chore: archive inactive projects to apps-archived/

Move inactive projects out of active workspace:
- bauntown (community website)
- maerchenzauber (AI story generation)
- memoro (voice memo app)
- news (news aggregation)
- nutriphi (nutrition tracking)
- reader (reading app)
- uload (URL shortener)
- wisekeep (AI wisdom extraction)

Update CLAUDE.md documentation:
- Add presi to active projects
- Document archived projects section
- Update workspace configuration

Archived apps can be re-activated by moving back to apps/

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Till-JS 2025-11-29 07:03:59 +01:00
parent b97149ac12
commit 61d181fbc2
3148 changed files with 437 additions and 46640 deletions

View file

@ -0,0 +1,24 @@
import { Module } from '@nestjs/common';
import { ConfigModule } from '@nestjs/config';
import { TranscriptionModule } from './transcription/transcription.module';
import { PlaylistModule } from './playlist/playlist.module';
import { YoutubeModule } from './youtube/youtube.module';
import { WhisperModule } from './whisper/whisper.module';
import { WebsocketModule } from './websocket/websocket.module';
import { HealthModule } from './health/health.module';
@Module({
imports: [
ConfigModule.forRoot({
isGlobal: true,
envFilePath: '.env',
}),
TranscriptionModule,
PlaylistModule,
YoutubeModule,
WhisperModule,
WebsocketModule,
HealthModule,
],
})
export class AppModule {}

View file

@ -0,0 +1,30 @@
import { Controller, Get } from '@nestjs/common';
@Controller('health')
export class HealthController {
@Get()
check() {
return {
status: 'ok',
timestamp: new Date().toISOString(),
service: 'transcriber-backend',
version: '1.0.0',
};
}
@Get('ready')
ready() {
return {
status: 'ready',
timestamp: new Date().toISOString(),
};
}
@Get('live')
live() {
return {
status: 'alive',
timestamp: new Date().toISOString(),
};
}
}

View file

@ -0,0 +1,7 @@
import { Module } from '@nestjs/common';
import { HealthController } from './health.controller';
@Module({
controllers: [HealthController],
})
export class HealthModule {}

View file

@ -0,0 +1,31 @@
import { NestFactory } from '@nestjs/core';
import { ValidationPipe } from '@nestjs/common';
import { AppModule } from './app.module';
async function bootstrap() {
const app = await NestFactory.create(AppModule);
app.enableCors({
origin: [
'http://localhost:5173', // SvelteKit dev
'http://localhost:4321', // Astro dev
'http://localhost:3000', // Alternative dev
],
credentials: true,
});
app.useGlobalPipes(
new ValidationPipe({
whitelist: true,
transform: true,
forbidNonWhitelisted: true,
})
);
const port = process.env.PORT || 3006;
await app.listen(port);
console.log(`[Transcriber Backend] Running on http://localhost:${port}`);
}
bootstrap();

View file

@ -0,0 +1,37 @@
import { Controller, Get, Post, Delete, Param, Body } from '@nestjs/common';
import { PlaylistService, CreatePlaylistDto } from './playlist.service';
@Controller('playlist')
export class PlaylistController {
constructor(private readonly playlistService: PlaylistService) {}
@Get()
async getAll() {
return this.playlistService.getAll();
}
@Get(':category/:name')
async getOne(@Param('category') category: string, @Param('name') name: string) {
return this.playlistService.getOne(category, name);
}
@Post()
async create(@Body() dto: CreatePlaylistDto) {
return this.playlistService.create(dto);
}
@Delete(':category/:name')
async delete(@Param('category') category: string, @Param('name') name: string) {
await this.playlistService.delete(category, name);
return { message: 'Playlist deleted' };
}
@Post(':category/:name/url')
async addUrl(
@Param('category') category: string,
@Param('name') name: string,
@Body('url') url: string
) {
return this.playlistService.addUrl(category, name, url);
}
}

View file

@ -0,0 +1,10 @@
import { Module } from '@nestjs/common';
import { PlaylistController } from './playlist.controller';
import { PlaylistService } from './playlist.service';
@Module({
controllers: [PlaylistController],
providers: [PlaylistService],
exports: [PlaylistService],
})
export class PlaylistModule {}

View file

@ -0,0 +1,173 @@
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import * as fs from 'fs';
import * as path from 'path';
export interface Playlist {
category: string;
name: string;
path: string;
urlCount: number;
urls: string[];
description?: string;
}
export interface CreatePlaylistDto {
name: string;
description?: string;
urls: string[];
}
@Injectable()
export class PlaylistService {
private readonly logger = new Logger(PlaylistService.name);
private readonly playlistsDir: string;
constructor(private configService: ConfigService) {
this.playlistsDir = this.configService.get<string>('PLAYLISTS_DIR') || './data/playlists';
// Ensure playlists directory exists
if (!fs.existsSync(this.playlistsDir)) {
fs.mkdirSync(this.playlistsDir, { recursive: true });
}
}
async getAll(): Promise<Playlist[]> {
const playlists: Playlist[] = [];
if (!fs.existsSync(this.playlistsDir)) {
return playlists;
}
const categories = fs
.readdirSync(this.playlistsDir, { withFileTypes: true })
.filter((d) => d.isDirectory());
for (const category of categories) {
const categoryPath = path.join(this.playlistsDir, category.name);
const files = fs.readdirSync(categoryPath).filter((f) => f.endsWith('.txt'));
for (const file of files) {
const filePath = path.join(categoryPath, file);
const content = fs.readFileSync(filePath, 'utf-8');
const lines = content.split('\n');
let description: string | undefined;
const urls: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.startsWith('# ') && !description) {
description = trimmed.substring(2);
} else if (trimmed && !trimmed.startsWith('#')) {
urls.push(trimmed);
}
}
playlists.push({
category: category.name,
name: file.replace('.txt', ''),
path: filePath,
urlCount: urls.length,
urls,
description,
});
}
}
return playlists;
}
async getOne(category: string, name: string): Promise<Playlist> {
const filePath = path.join(this.playlistsDir, category, `${name}.txt`);
if (!fs.existsSync(filePath)) {
throw new NotFoundException(`Playlist ${category}/${name} not found`);
}
const content = fs.readFileSync(filePath, 'utf-8');
const lines = content.split('\n');
let description: string | undefined;
const urls: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.startsWith('# ') && !description) {
description = trimmed.substring(2);
} else if (trimmed && !trimmed.startsWith('#')) {
urls.push(trimmed);
}
}
return {
category,
name,
path: filePath,
urlCount: urls.length,
urls,
description,
};
}
async create(dto: CreatePlaylistDto): Promise<Playlist> {
// Parse category/name format
const parts = dto.name.split('/');
const category = parts.length > 1 ? parts[0] : 'general';
const name = parts.length > 1 ? parts[1] : dto.name;
const categoryDir = path.join(this.playlistsDir, category);
if (!fs.existsSync(categoryDir)) {
fs.mkdirSync(categoryDir, { recursive: true });
}
const filePath = path.join(categoryDir, `${name}.txt`);
let content = '';
if (dto.description) {
content += `# ${dto.description}\n`;
}
content += '# One URL per line\n\n';
content += dto.urls.join('\n') + '\n';
fs.writeFileSync(filePath, content, 'utf-8');
this.logger.log(`Created playlist: ${category}/${name}`);
return {
category,
name,
path: filePath,
urlCount: dto.urls.length,
urls: dto.urls,
description: dto.description,
};
}
async delete(category: string, name: string): Promise<void> {
const filePath = path.join(this.playlistsDir, category, `${name}.txt`);
if (!fs.existsSync(filePath)) {
throw new NotFoundException(`Playlist ${category}/${name} not found`);
}
fs.unlinkSync(filePath);
this.logger.log(`Deleted playlist: ${category}/${name}`);
}
async addUrl(category: string, name: string, url: string): Promise<Playlist> {
const playlist = await this.getOne(category, name);
playlist.urls.push(url);
const content =
(playlist.description ? `# ${playlist.description}\n` : '') +
'# One URL per line\n\n' +
playlist.urls.join('\n') +
'\n';
fs.writeFileSync(playlist.path, content, 'utf-8');
playlist.urlCount = playlist.urls.length;
return playlist;
}
}

View file

@ -0,0 +1,35 @@
import { IsString, IsOptional, IsUrl, IsEnum } from 'class-validator';
export enum WhisperProviderEnum {
GROQ = 'groq',
LOCAL = 'local',
}
export enum WhisperModelEnum {
// Groq models (cloud)
WHISPER_LARGE_V3_TURBO = 'whisper-large-v3-turbo',
WHISPER_LARGE_V3 = 'whisper-large-v3',
// Local models
TINY = 'tiny',
BASE = 'base',
SMALL = 'small',
MEDIUM = 'medium',
LARGE = 'large',
}
export class TranscribeRequestDto {
@IsUrl()
url: string;
@IsString()
@IsOptional()
language?: string = 'de';
@IsEnum(WhisperProviderEnum)
@IsOptional()
provider?: WhisperProviderEnum;
@IsEnum(WhisperModelEnum)
@IsOptional()
model?: WhisperModelEnum;
}

View file

@ -0,0 +1,46 @@
export enum JobStatus {
PENDING = 'pending',
DOWNLOADING = 'downloading',
TRANSCRIBING = 'transcribing',
COMPLETED = 'completed',
FAILED = 'failed',
CANCELLED = 'cancelled',
}
export interface VideoInfo {
id: string;
title: string;
description: string;
duration: number;
channel: string;
channelId: string;
thumbnail: string;
uploadDate: string;
}
export class TranscriptionJob {
id: string;
url: string;
language: string;
provider: string;
model?: string;
status: JobStatus;
progress: number;
createdAt: Date;
completedAt?: Date;
videoInfo?: VideoInfo;
transcriptPath?: string;
transcriptText?: string;
error?: string;
constructor(id: string, url: string, language: string, provider: string, model?: string) {
this.id = id;
this.url = url;
this.language = language;
this.provider = provider;
this.model = model;
this.status = JobStatus.PENDING;
this.progress = 0;
this.createdAt = new Date();
}
}

View file

@ -0,0 +1,33 @@
import { Controller, Get, Post, Delete, Param, Body } from '@nestjs/common';
import { TranscriptionService } from './transcription.service';
import { TranscribeRequestDto } from './dto/transcribe-request.dto';
@Controller('transcription')
export class TranscriptionController {
constructor(private readonly transcriptionService: TranscriptionService) {}
@Post()
async createJob(@Body() dto: TranscribeRequestDto) {
return this.transcriptionService.createJob(dto);
}
@Get()
async getAllJobs() {
return this.transcriptionService.getAllJobs();
}
@Get('stats')
async getStats() {
return this.transcriptionService.getStats();
}
@Get(':id')
async getJob(@Param('id') id: string) {
return this.transcriptionService.getJob(id);
}
@Delete(':id')
async cancelJob(@Param('id') id: string) {
return this.transcriptionService.cancelJob(id);
}
}

View file

@ -0,0 +1,14 @@
import { Module } from '@nestjs/common';
import { TranscriptionController } from './transcription.controller';
import { TranscriptionService } from './transcription.service';
import { YoutubeModule } from '../youtube/youtube.module';
import { WhisperModule } from '../whisper/whisper.module';
import { WebsocketModule } from '../websocket/websocket.module';
@Module({
imports: [YoutubeModule, WhisperModule, WebsocketModule],
controllers: [TranscriptionController],
providers: [TranscriptionService],
exports: [TranscriptionService],
})
export class TranscriptionModule {}

View file

@ -0,0 +1,254 @@
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { v4 as uuidv4 } from 'uuid';
import * as fs from 'fs';
import * as path from 'path';
import { YoutubeService } from '../youtube/youtube.service';
import { WhisperService, WhisperProvider, WhisperModel } from '../whisper/whisper.service';
import { ProgressGateway } from '../websocket/progress.gateway';
import { TranscriptionJob, JobStatus } from './entities/transcription-job.entity';
import { TranscribeRequestDto } from './dto/transcribe-request.dto';
@Injectable()
export class TranscriptionService {
private readonly logger = new Logger(TranscriptionService.name);
private readonly jobs: Map<string, TranscriptionJob> = new Map();
private readonly transcriptsDir: string;
constructor(
private readonly configService: ConfigService,
private readonly youtubeService: YoutubeService,
private readonly whisperService: WhisperService,
private readonly progressGateway: ProgressGateway
) {
this.transcriptsDir = this.configService.get<string>('TRANSCRIPTS_DIR') || './data/transcripts';
// Ensure transcripts directory exists
if (!fs.existsSync(this.transcriptsDir)) {
fs.mkdirSync(this.transcriptsDir, { recursive: true });
}
}
async createJob(dto: TranscribeRequestDto): Promise<TranscriptionJob> {
const jobId = uuidv4();
const job = new TranscriptionJob(
jobId,
dto.url,
dto.language || 'de',
dto.provider || 'openai',
dto.model
);
this.jobs.set(jobId, job);
// Start processing in background
this.processJob(job);
return job;
}
async getJob(id: string): Promise<TranscriptionJob> {
const job = this.jobs.get(id);
if (!job) {
throw new NotFoundException(`Job ${id} not found`);
}
return job;
}
async getAllJobs(): Promise<TranscriptionJob[]> {
return Array.from(this.jobs.values());
}
async cancelJob(id: string): Promise<TranscriptionJob> {
const job = this.jobs.get(id);
if (!job) {
throw new NotFoundException(`Job ${id} not found`);
}
if (
job.status === JobStatus.PENDING ||
job.status === JobStatus.DOWNLOADING ||
job.status === JobStatus.TRANSCRIBING
) {
job.status = JobStatus.CANCELLED;
job.error = 'Cancelled by user';
this.progressGateway.broadcastJobUpdate(job.id, {
status: job.status,
error: job.error,
});
}
return job;
}
private async processJob(job: TranscriptionJob): Promise<void> {
let audioPath: string | null = null;
const jobId = job.id;
// Helper to check if job was cancelled (re-reads from map to get current status)
const isCancelled = (): boolean => {
const currentJob = this.jobs.get(jobId);
return currentJob?.status === JobStatus.CANCELLED;
};
try {
// Step 1: Get video info
this.updateJobProgress(job, JobStatus.DOWNLOADING, 5);
const videoInfo = await this.youtubeService.getVideoInfo(job.url);
job.videoInfo = videoInfo;
this.updateJobProgress(job, JobStatus.DOWNLOADING, 10);
this.logger.log(`Processing: ${videoInfo.title}`);
// Check if cancelled
if (isCancelled()) return;
// Step 2: Download audio
audioPath = await this.youtubeService.downloadAudio(job.url, (progress) => {
const overallProgress = 10 + progress.percent * 0.4; // 10-50%
this.updateJobProgress(job, JobStatus.DOWNLOADING, Math.round(overallProgress));
});
this.updateJobProgress(job, JobStatus.DOWNLOADING, 50);
// Check if cancelled
if (isCancelled()) {
if (audioPath) await this.youtubeService.cleanupFile(audioPath);
return;
}
// Step 3: Transcribe
this.updateJobProgress(job, JobStatus.TRANSCRIBING, 55);
const result = await this.whisperService.transcribe(
audioPath,
job.language,
job.provider as WhisperProvider,
job.model as WhisperModel
);
this.updateJobProgress(job, JobStatus.TRANSCRIBING, 90);
// Check if cancelled
if (isCancelled()) {
if (audioPath) await this.youtubeService.cleanupFile(audioPath);
return;
}
// Step 4: Save transcript
const transcriptPath = await this.saveTranscript(job, videoInfo, result.text);
job.transcriptPath = transcriptPath;
job.transcriptText = result.text;
job.status = JobStatus.COMPLETED;
job.progress = 100;
job.completedAt = new Date();
this.progressGateway.broadcastJobUpdate(job.id, {
status: job.status,
progress: job.progress,
transcriptPath: job.transcriptPath,
});
this.logger.log(`Completed: ${videoInfo.title}`);
} catch (error) {
job.status = JobStatus.FAILED;
job.error = error instanceof Error ? error.message : 'Unknown error';
this.progressGateway.broadcastJobUpdate(job.id, {
status: job.status,
error: job.error,
});
this.logger.error(`Job failed: ${job.error}`);
} finally {
// Cleanup audio file
if (audioPath) {
await this.youtubeService.cleanupFile(audioPath);
}
}
}
private updateJobProgress(job: TranscriptionJob, status: JobStatus, progress: number): void {
job.status = status;
job.progress = progress;
this.progressGateway.broadcastJobUpdate(job.id, {
status: job.status,
progress: job.progress,
videoInfo: job.videoInfo,
});
}
private async saveTranscript(
job: TranscriptionJob,
videoInfo: { channel: string; title: string; id: string },
text: string
): Promise<string> {
// Sanitize names for filesystem
const sanitize = (str: string) => str.replace(/[^a-z0-9äöüß\-_]/gi, '_').substring(0, 50);
const channelDir = path.join(this.transcriptsDir, sanitize(videoInfo.channel));
if (!fs.existsSync(channelDir)) {
fs.mkdirSync(channelDir, { recursive: true });
}
const filename = `${sanitize(videoInfo.title)}_${videoInfo.id}.txt`;
const filePath = path.join(channelDir, filename);
const content = `# ${videoInfo.title}
Channel: ${videoInfo.channel}
Video ID: ${videoInfo.id}
Language: ${job.language}
Transcribed: ${new Date().toISOString()}
Provider: ${job.provider}
---
${text}
`;
fs.writeFileSync(filePath, content, 'utf-8');
return filePath;
}
async getStats() {
const jobs = Array.from(this.jobs.values());
let totalTranscripts = 0;
let totalSize = 0;
if (fs.existsSync(this.transcriptsDir)) {
const countFiles = (dir: string) => {
const items = fs.readdirSync(dir, { withFileTypes: true });
for (const item of items) {
const fullPath = path.join(dir, item.name);
if (item.isDirectory()) {
countFiles(fullPath);
} else if (item.name.endsWith('.txt')) {
totalTranscripts++;
totalSize += fs.statSync(fullPath).size;
}
}
};
countFiles(this.transcriptsDir);
}
return {
totalTranscripts,
totalSizeMB: Math.round((totalSize / 1024 / 1024) * 100) / 100,
activeJobs: jobs.filter(
(j) =>
j.status === JobStatus.PENDING ||
j.status === JobStatus.DOWNLOADING ||
j.status === JobStatus.TRANSCRIBING
).length,
completedJobs: jobs.filter((j) => j.status === JobStatus.COMPLETED).length,
failedJobs: jobs.filter((j) => j.status === JobStatus.FAILED).length,
};
}
}

View file

@ -0,0 +1,79 @@
import {
WebSocketGateway,
WebSocketServer,
OnGatewayConnection,
OnGatewayDisconnect,
} from '@nestjs/websockets';
import { Logger } from '@nestjs/common';
import { Server, Socket } from 'socket.io';
export interface JobUpdatePayload {
status: string;
progress?: number;
error?: string;
videoInfo?: {
id: string;
title: string;
channel: string;
thumbnail: string;
};
transcriptPath?: string;
}
@WebSocketGateway({
cors: {
origin: ['http://localhost:5173', 'http://localhost:4321', 'http://localhost:3000'],
credentials: true,
},
namespace: '/progress',
})
export class ProgressGateway implements OnGatewayConnection, OnGatewayDisconnect {
private readonly logger = new Logger(ProgressGateway.name);
@WebSocketServer()
server: Server;
handleConnection(client: Socket) {
this.logger.log(`Client connected: ${client.id}`);
// Send heartbeat every 10 seconds
const interval = setInterval(() => {
client.emit('heartbeat', { timestamp: Date.now() });
}, 10000);
client.on('disconnect', () => {
clearInterval(interval);
});
}
handleDisconnect(client: Socket) {
this.logger.log(`Client disconnected: ${client.id}`);
}
broadcastJobUpdate(jobId: string, payload: JobUpdatePayload) {
this.server.emit('job_update', {
type: 'job_update',
jobId,
...payload,
timestamp: Date.now(),
});
}
broadcastJobComplete(jobId: string, payload: JobUpdatePayload) {
this.server.emit('job_complete', {
type: 'job_complete',
jobId,
...payload,
timestamp: Date.now(),
});
}
broadcastJobError(jobId: string, error: string) {
this.server.emit('job_error', {
type: 'job_error',
jobId,
error,
timestamp: Date.now(),
});
}
}

View file

@ -0,0 +1,9 @@
import { Module, Global } from '@nestjs/common';
import { ProgressGateway } from './progress.gateway';
@Global()
@Module({
providers: [ProgressGateway],
exports: [ProgressGateway],
})
export class WebsocketModule {}

View file

@ -0,0 +1,17 @@
import { Controller, Get } from '@nestjs/common';
import { WhisperService } from './whisper.service';
@Controller('whisper')
export class WhisperController {
constructor(private readonly whisperService: WhisperService) {}
@Get('models')
getModels() {
return {
models: this.whisperService.getAvailableModels(),
defaultProvider: this.whisperService.getDefaultProvider(),
defaultModel: this.whisperService.getDefaultModel(),
groqAvailable: this.whisperService.isGroqAvailable(),
};
}
}

View file

@ -0,0 +1,10 @@
import { Module } from '@nestjs/common';
import { WhisperService } from './whisper.service';
import { WhisperController } from './whisper.controller';
@Module({
controllers: [WhisperController],
providers: [WhisperService],
exports: [WhisperService],
})
export class WhisperModule {}

View file

@ -0,0 +1,219 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { spawn } from 'child_process';
import * as fs from 'fs';
import OpenAI from 'openai';
export type WhisperProvider = 'groq' | 'local';
export type GroqWhisperModel = 'whisper-large-v3-turbo' | 'whisper-large-v3';
export type LocalWhisperModel = 'tiny' | 'base' | 'small' | 'medium' | 'large';
export type WhisperModel = GroqWhisperModel | LocalWhisperModel;
export interface TranscriptionResult {
text: string;
language: string;
duration: number;
provider: WhisperProvider;
}
export interface WhisperModelInfo {
name: string;
provider: WhisperProvider;
speed: string;
accuracy: string;
cost?: string;
}
@Injectable()
export class WhisperService {
private readonly logger = new Logger(WhisperService.name);
private readonly groqClient: OpenAI | null;
private readonly defaultProvider: WhisperProvider;
private readonly defaultModel: WhisperModel;
constructor(private configService: ConfigService) {
const groqApiKey = this.configService.get<string>('GROQ_API_KEY');
if (groqApiKey) {
// Groq uses OpenAI-compatible API
this.groqClient = new OpenAI({
apiKey: groqApiKey,
baseURL: 'https://api.groq.com/openai/v1',
});
this.logger.log('Groq API configured successfully');
} else {
this.groqClient = null;
this.logger.warn('Groq API key not configured. Only local Whisper available.');
}
this.defaultProvider =
(this.configService.get<string>('WHISPER_PROVIDER') as WhisperProvider) || 'groq';
this.defaultModel =
(this.configService.get<string>('WHISPER_MODEL') as WhisperModel) || 'whisper-large-v3-turbo';
}
async transcribe(
audioPath: string,
language: string = 'de',
provider?: WhisperProvider,
model?: WhisperModel
): Promise<TranscriptionResult> {
const selectedProvider = provider || this.defaultProvider;
const selectedModel = model || this.defaultModel;
// Fallback to local if Groq not available
if (selectedProvider === 'groq' && !this.groqClient) {
this.logger.warn('Groq not configured, falling back to local Whisper');
return this.transcribeWithLocalWhisper(
audioPath,
language,
selectedModel as LocalWhisperModel
);
}
if (selectedProvider === 'groq') {
return this.transcribeWithGroq(audioPath, language, selectedModel as GroqWhisperModel);
}
return this.transcribeWithLocalWhisper(audioPath, language, selectedModel as LocalWhisperModel);
}
private async transcribeWithGroq(
audioPath: string,
language: string,
model: GroqWhisperModel = 'whisper-large-v3-turbo'
): Promise<TranscriptionResult> {
if (!this.groqClient) {
throw new Error('Groq API not configured');
}
this.logger.log(`Transcribing with Groq Whisper API (${model}): ${audioPath}`);
const startTime = Date.now();
const transcription = await this.groqClient.audio.transcriptions.create({
file: fs.createReadStream(audioPath),
model: model,
language,
response_format: 'verbose_json',
});
const duration = (Date.now() - startTime) / 1000;
this.logger.log(`Groq transcription completed in ${duration.toFixed(2)}s`);
return {
text: transcription.text,
language: transcription.language || language,
duration,
provider: 'groq',
};
}
private async transcribeWithLocalWhisper(
audioPath: string,
language: string,
model: WhisperModel
): Promise<TranscriptionResult> {
this.logger.log(`Transcribing with local Whisper (model: ${model}): ${audioPath}`);
const startTime = Date.now();
return new Promise((resolve, reject) => {
// Python script to run Whisper
const pythonScript = `
import whisper
import json
import sys
model = whisper.load_model("${model}")
result = model.transcribe("${audioPath}", language="${language}")
print(json.dumps({"text": result["text"], "language": result.get("language", "${language}")}))
`.trim();
const python = spawn('python3', ['-c', pythonScript]);
let stdout = '';
let stderr = '';
python.stdout.on('data', (data) => {
stdout += data.toString();
});
python.stderr.on('data', (data) => {
stderr += data.toString();
// Whisper outputs progress to stderr, log it
this.logger.debug(data.toString());
});
python.on('close', (code) => {
const duration = (Date.now() - startTime) / 1000;
if (code !== 0) {
this.logger.error(`Local Whisper error: ${stderr}`);
reject(new Error(`Transcription failed: ${stderr}`));
return;
}
try {
const result = JSON.parse(stdout.trim());
resolve({
text: result.text,
language: result.language,
duration,
provider: 'local',
});
} catch (e) {
reject(new Error('Failed to parse transcription result'));
}
});
});
}
getAvailableModels(): WhisperModelInfo[] {
const models: WhisperModelInfo[] = [];
// Groq models (cloud, ultra-fast)
if (this.groqClient) {
models.push(
{
name: 'whisper-large-v3-turbo',
provider: 'groq',
speed: '~300x realtime',
accuracy: '95%',
cost: '$0.04/hour',
},
{
name: 'whisper-large-v3',
provider: 'groq',
speed: '~250x realtime',
accuracy: '97%',
cost: '$0.111/hour',
}
);
}
// Local models
models.push(
{ name: 'tiny', provider: 'local', speed: '~10x realtime', accuracy: '75%' },
{ name: 'base', provider: 'local', speed: '~7x realtime', accuracy: '85%' },
{ name: 'small', provider: 'local', speed: '~4x realtime', accuracy: '91%' },
{ name: 'medium', provider: 'local', speed: '~2x realtime', accuracy: '94%' },
{ name: 'large', provider: 'local', speed: '~1x realtime', accuracy: '96-98%' }
);
return models;
}
isGroqAvailable(): boolean {
return this.groqClient !== null;
}
getDefaultProvider(): WhisperProvider {
return this.defaultProvider;
}
getDefaultModel(): WhisperModel {
return this.defaultModel;
}
}

View file

@ -0,0 +1,8 @@
import { Module } from '@nestjs/common';
import { YoutubeService } from './youtube.service';
@Module({
providers: [YoutubeService],
exports: [YoutubeService],
})
export class YoutubeModule {}

View file

@ -0,0 +1,163 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { spawn } from 'child_process';
import * as path from 'path';
import * as fs from 'fs';
import { v4 as uuidv4 } from 'uuid';
export interface VideoInfo {
id: string;
title: string;
description: string;
duration: number;
channel: string;
channelId: string;
thumbnail: string;
uploadDate: string;
}
export interface DownloadProgress {
percent: number;
speed: string;
eta: string;
}
@Injectable()
export class YoutubeService {
private readonly logger = new Logger(YoutubeService.name);
private readonly tempDir: string;
constructor(private configService: ConfigService) {
this.tempDir = this.configService.get<string>('TEMP_AUDIO_DIR') || './temp_audio';
// Ensure temp directory exists
if (!fs.existsSync(this.tempDir)) {
fs.mkdirSync(this.tempDir, { recursive: true });
}
}
async getVideoInfo(url: string): Promise<VideoInfo> {
return new Promise((resolve, reject) => {
const ytdlp = spawn('yt-dlp', ['--dump-json', '--no-download', url]);
let stdout = '';
let stderr = '';
ytdlp.stdout.on('data', (data) => {
stdout += data.toString();
});
ytdlp.stderr.on('data', (data) => {
stderr += data.toString();
});
ytdlp.on('close', (code) => {
if (code !== 0) {
this.logger.error(`yt-dlp info error: ${stderr}`);
reject(new Error(`Failed to get video info: ${stderr}`));
return;
}
try {
const info = JSON.parse(stdout);
resolve({
id: info.id,
title: info.title,
description: info.description || '',
duration: info.duration,
channel: info.channel || info.uploader,
channelId: info.channel_id || info.uploader_id,
thumbnail: info.thumbnail,
uploadDate: info.upload_date,
});
} catch (e) {
reject(new Error('Failed to parse video info'));
}
});
});
}
async downloadAudio(
url: string,
onProgress?: (progress: DownloadProgress) => void
): Promise<string> {
const outputId = uuidv4();
const outputPath = path.join(this.tempDir, `${outputId}.mp3`);
return new Promise((resolve, reject) => {
const ytdlp = spawn('yt-dlp', [
'-x',
'--audio-format',
'mp3',
'--audio-quality',
'0',
'-o',
outputPath.replace('.mp3', '.%(ext)s'),
'--newline',
url,
]);
let stderr = '';
ytdlp.stdout.on('data', (data) => {
const line = data.toString();
// Parse download progress
const progressMatch = line.match(/(\d+\.?\d*)%.*?(\d+\.?\d*\w+\/s).*?ETA\s+(\d+:\d+)/);
if (progressMatch && onProgress) {
onProgress({
percent: parseFloat(progressMatch[1]),
speed: progressMatch[2],
eta: progressMatch[3],
});
}
});
ytdlp.stderr.on('data', (data) => {
stderr += data.toString();
});
ytdlp.on('close', (code) => {
if (code !== 0) {
this.logger.error(`yt-dlp download error: ${stderr}`);
reject(new Error(`Download failed: ${stderr}`));
return;
}
// Find the actual output file (might have different extension initially)
const files = fs.readdirSync(this.tempDir);
const outputFile = files.find((f) => f.startsWith(outputId));
if (!outputFile) {
reject(new Error('Output file not found'));
return;
}
const actualPath = path.join(this.tempDir, outputFile);
this.logger.log(`Downloaded audio to: ${actualPath}`);
resolve(actualPath);
});
});
}
async cleanupFile(filePath: string): Promise<void> {
try {
if (fs.existsSync(filePath)) {
fs.unlinkSync(filePath);
this.logger.log(`Cleaned up: ${filePath}`);
}
} catch (e) {
this.logger.warn(`Failed to cleanup file: ${filePath}`);
}
}
isValidYoutubeUrl(url: string): boolean {
const patterns = [
/^(https?:\/\/)?(www\.)?(youtube\.com|youtu\.be)\//,
/^(https?:\/\/)?(www\.)?youtube\.com\/watch\?v=/,
/^(https?:\/\/)?youtu\.be\//,
];
return patterns.some((pattern) => pattern.test(url));
}
}