diff --git a/apps/transcriber/.gitignore b/apps/transcriber/.gitignore new file mode 100644 index 000000000..1cc1e19fb --- /dev/null +++ b/apps/transcriber/.gitignore @@ -0,0 +1,53 @@ +# Python (legacy) +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +venv/ +env/ +ENV/ + +# Data (transcripts, playlists) +data/ + +# Node +node_modules/ +.npm +dist/ +build/ +.astro/ +.svelte-kit/ +.turbo/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +.DS_Store + +# Environment +.env +.env.local +.env.*.local + +# Logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Build outputs +apps/*/dist/ +apps/*/.astro/ +apps/*/.svelte-kit/ + +# Expo +apps/mobile/.expo/ +apps/mobile/ios/ +apps/mobile/android/ + +# Legacy +legacy/venv/ +legacy/__pycache__/ diff --git a/apps/transcriber/CLAUDE.md b/apps/transcriber/CLAUDE.md new file mode 100644 index 000000000..1a11db009 --- /dev/null +++ b/apps/transcriber/CLAUDE.md @@ -0,0 +1,217 @@ +# CLAUDE.md - Transcriber + +This file provides guidance to Claude Code when working with the Transcriber project. + +## Project Overview + +Transcriber is an AI-powered YouTube video transcription application with: +- YouTube video download via yt-dlp +- Ultra-fast audio transcription using Groq Whisper API (~300x realtime) +- Fallback to local Whisper for offline use +- Playlist management for batch processing +- Real-time progress updates via WebSocket +- Multi-platform support (Web, Mobile, Landing) + +## Architecture + +``` +apps/transcriber/ +├── apps/ +│ ├── backend/ # NestJS API server (port 3006) +│ ├── web/ # SvelteKit web application +│ ├── landing/ # Astro landing/content site +│ └── mobile/ # Expo React Native app +├── packages/ +│ └── shared-types/ # Shared TypeScript types +├── data/ # Transcripts & playlists (gitignored) +├── legacy/ # Original Python code (reference) +├── package.json # Root orchestrator +└── CLAUDE.md # This file +``` + +## Quick Start + +### Prerequisites +- Node.js 20+ +- pnpm 9.15.0+ +- yt-dlp installed (`brew install yt-dlp` on macOS) +- For local Whisper: Python 3 with openai-whisper package + +### Development + +```bash +# From monorepo root +pnpm install + +# Start all transcriber apps +pnpm transcriber:dev + +# Start individual apps +pnpm dev:transcriber:backend # NestJS backend (port 3006) +pnpm dev:transcriber:web # SvelteKit web (port 5173) +pnpm dev:transcriber:landing # Astro landing (port 4321) +pnpm dev:transcriber:mobile # Expo mobile + +# Start web + backend together +pnpm dev:transcriber:app +``` + +### Environment Variables + +Create `apps/transcriber/apps/backend/.env`: +```bash +PORT=3006 +WHISPER_PROVIDER=groq # groq or local +WHISPER_MODEL=whisper-large-v3-turbo # whisper-large-v3-turbo, whisper-large-v3 (groq) | tiny, base, small, medium, large (local) +GROQ_API_KEY=gsk_... # Required for Groq provider +TEMP_AUDIO_DIR=./temp_audio +TRANSCRIPTS_DIR=./data/transcripts +PLAYLISTS_DIR=./data/playlists +``` + +## API Endpoints + +### Transcription +| Method | Endpoint | Description | +|--------|----------|-------------| +| POST | `/transcription` | Start new transcription job | +| GET | `/transcription` | List all jobs | +| GET | `/transcription/:id` | Get job status | +| DELETE | `/transcription/:id` | Cancel job | +| GET | `/transcription/stats` | Get statistics | + +### Playlists +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/playlist` | List all playlists | +| GET | `/playlist/:category/:name` | Get specific playlist | +| POST | `/playlist` | Create playlist | +| DELETE | `/playlist/:category/:name` | Delete playlist | + +### Whisper +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/whisper/models` | Get available models | + +### Health +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/health` | Health check | +| GET | `/health/ready` | Readiness check | +| GET | `/health/live` | Liveness check | + +## WebSocket + +Connect to `/progress` namespace for real-time updates: + +```typescript +const socket = io('http://localhost:3006/progress'); + +socket.on('job_update', (data) => { + // { type, jobId, status, progress, videoInfo } +}); + +socket.on('job_complete', (data) => { + // { type, jobId, status, transcriptPath } +}); + +socket.on('job_error', (data) => { + // { type, jobId, error } +}); +``` + +## Whisper Configuration + +### Groq Whisper API (Recommended) +- Ultra-fast, cloud-based (~300x realtime speed) +- Cost: ~$0.04/hour (whisper-large-v3-turbo) or ~$0.111/hour (whisper-large-v3) +- No GPU required +- Models: `whisper-large-v3-turbo` (fast) or `whisper-large-v3` (accurate) +- Set `WHISPER_PROVIDER=groq` and `GROQ_API_KEY` + +### Local Whisper +- Free, runs locally +- Requires Python + openai-whisper +- GPU recommended for larger models +- Models: `tiny`, `base`, `small`, `medium`, `large` +- Set `WHISPER_PROVIDER=local` and `WHISPER_MODEL` + +## Technology Stack + +| Component | Technology | +|-----------|------------| +| Backend | NestJS 10, TypeScript | +| Web | SvelteKit 2, Svelte 5, Tailwind | +| Landing | Astro 4, Tailwind | +| Mobile | Expo 52, React Native, NativeWind | +| YouTube | yt-dlp (via child_process) | +| Transcription | Groq Whisper API / local Whisper | +| Real-time | Socket.io | +| State (Mobile) | Zustand | + +## Code Patterns + +### Backend Services +```typescript +@Injectable() +export class TranscriptionService { + async createJob(dto: TranscribeRequestDto): Promise { + // Background processing with WebSocket updates + } +} +``` + +### Web (Svelte 5 Runes) +```typescript +// Correct - Svelte 5 +let jobs = $state([]); +let activeJobs = $derived(jobs.filter(j => j.status === 'active')); + +// Wrong - Old Svelte syntax +let jobs = []; +$: activeJobs = jobs.filter(j => j.status === 'active'); +``` + +### Mobile (Zustand) +```typescript +export const useJobStore = create((set) => ({ + jobs: [], + addJob: (job) => set((state) => ({ jobs: [...state.jobs, job] })), +})); +``` + +## Legacy Python Code + +The original Python implementation is preserved in `legacy/` for reference: +- `transcriber_v4_parallel.py` - Main transcription logic +- `api_server.py` - FastAPI server (replaced by NestJS) +- `requirements.txt` - Python dependencies + +## Troubleshooting + +### yt-dlp not found +```bash +# macOS +brew install yt-dlp + +# Linux +pip install yt-dlp +``` + +### Local Whisper not working +```bash +# Install Whisper +pip install openai-whisper + +# Test +python3 -c "import whisper; print(whisper.available_models())" +``` + +### Backend can't start +```bash +# Check port 3006 +lsof -i :3006 && kill -9 $(lsof -t -i:3006) + +# Check environment +cat apps/backend/.env +``` diff --git a/apps/transcriber/README.md b/apps/transcriber/README.md new file mode 100644 index 000000000..b8b9eb689 --- /dev/null +++ b/apps/transcriber/README.md @@ -0,0 +1,392 @@ +# 🎥 YouTube Transcriber System + +Ein vollständiges System zur automatischen Transkription, Aufbereitung und Präsentation von YouTube-Videos mit OpenAI's Whisper, FastAPI Backend und Astro.js Frontend. + +## ✨ System-Komponenten + +### 🔧 Backend (Python) +- **OpenAI Whisper** - Lokale Speech-to-Text Transkription +- **FastAPI Server** - REST API für Web-Interface +- **Parallel Processing** - Bis zu 3.3x schnellere Verarbeitung +- **Playlist Management** - Automatische Batch-Verarbeitung + +### 🌐 Frontend (Astro.js) +- **Public Website** - Aufbereitete Vorträge als Wisdom Library +- **Admin Panel** - Transkriptions-Management (localhost only) +- **Content Collections** - Strukturierte Inhalte mit Markdown +- **Responsive Design** - Optimiert für alle Geräte + +## 🏗️ Architektur + +``` +YoutubeDL/ +├── 🐍 Python Backend +│ ├── transcriber_v4_parallel.py # Parallel-Verarbeitung +│ ├── api_server.py # FastAPI REST API +│ └── playlists/ # YouTube URL-Listen +├── 🌐 Website +│ ├── src/pages/ # Public & Admin Pages +│ ├── src/content/talks/ # Aufbereitete Vorträge +│ └── src/components/admin/ # Admin-Komponenten +└── 📂 Output + └── transcripts/ # Transkribierte Texte +``` + +## 🛠 Installation + +### Voraussetzungen + +- Python 3.10+ +- FFmpeg +- macOS (optimiert für Apple Silicon M1/M2) + +### Setup + +1. **Repository klonen:** +```bash +git clone https://github.com/yourusername/youtube-transcriber.git +cd youtube-transcriber +``` + +2. **Virtual Environment erstellen:** +```bash +python3 -m venv venv +source venv/bin/activate +``` + +3. **Dependencies installieren:** +```bash +pip install -r requirements.txt +``` + +## 🚀 Schnellstart + +### Kompletter Workflow: Von YouTube zu Website + +#### 1. Speaker Content sammeln + +Erstelle eine Playlist für einen Speaker (z.B. Simon Sinek): + +```bash +# playlists/people/simon-sinek.txt erstellen +# Simon Sinek Videos +# Popular talks and interviews from YouTube +# Created: 2025-09-09 + +# TED Talks +# How great leaders inspire action (Start with Why) - 60M+ views +https://www.youtube.com/watch?v=u4ZoJKF_VuA + +# Why good leaders make you feel safe - 18M+ views +https://www.youtube.com/watch?v=lmyZMtPVodo +``` + +#### 2. Videos transkribieren + +```bash +# Virtual Environment aktivieren +source venv/bin/activate + +# Parallel-Verarbeitung starten (3-4x schneller) +python3 transcriber_v4_parallel.py --playlist playlists/people/simon-sinek.txt --model base --language en +``` + +#### 3. Website Content erstellen + +**a) Content Schema erweitern** (wenn neue Kategorie): +```typescript +// website/src/content/config.ts +category: z.enum([ + 'behavioral-economics', + 'psychology', + 'leadership', // Neue Kategorie hinzufügen + // ... +]), +``` + +**b) Speaker Profil erstellen**: +```bash +# website/src/pages/speakers/simon-sinek.astro +``` + +**c) Talk-Seiten erstellen**: +```bash +# Für jedes erfolgreich transkribierte Video: +# website/src/content/talks/simon-sinek-[talk-slug].md +``` + +**d) SearchableContentList aktualisieren**: +```typescript +// website/src/components/SearchableContentList.tsx +// Neue Talks zur Inhaltsliste hinzufügen +``` + +#### 4. Website starten + +```bash +cd website +npm run dev +``` + +### Einzelnes Video transkribieren + +```bash +# Mit Large-Modell (beste Qualität) +python3 transcriber_v3.py process "https://www.youtube.com/watch?v=VIDEO_ID" --model large + +# Mit Tiny-Modell (schneller Test) +python3 transcriber_v3.py process "https://www.youtube.com/watch?v=VIDEO_ID" --model tiny +``` + +### Playlists verwalten + +1. **Playlist erstellen:** + - Erstelle eine `.txt` Datei im `playlists/` Ordner + - Füge YouTube-URLs ein (eine pro Zeile) + +```bash +# playlists/tech/python_tutorials.txt +https://www.youtube.com/watch?v=VIDEO_ID1 +https://www.youtube.com/watch?v=VIDEO_ID2 +``` + +2. **Alle Playlists scannen:** +```bash +python3 transcriber_v3.py scan --model large +``` + +3. **Spezifische Playlist verarbeiten:** +```bash +python3 transcriber_v3.py scan --playlist tech/python_tutorials +``` + +### Quick-Script verwenden + +```bash +./quick_transcribe.sh +``` + +Bietet ein interaktives Menü zur Modell-Auswahl. + +## 📂 Projektstruktur + +``` +YoutubeDL/ +├── playlists/ # YouTube URL-Listen nach Themen +│ ├── tech/ +│ │ └── python_tutorials.txt +│ ├── people/ +│ │ └── rory-sutherland.txt +│ └── musik/ +│ └── klassik.txt +├── transcripts/ # Transkribierte Texte (automatisch organisiert) +│ ├── tech_python_tutorials/ +│ │ └── [Kanal]/ +│ │ └── [Video]_[Timestamp].txt +│ └── people_rory-sutherland/ +│ └── TED/ +├── .cache/ # Cache für bereits verarbeitete Videos +├── temp_audio/ # Temporäre Audio-Dateien +├── venv/ # Python Virtual Environment +├── transcriber.py # v1: Basis-Funktionalität +├── transcriber_v2.py # v2: Mit Rich UI +├── transcriber_v3.py # v3: Mit Playlist-Management +└── quick_transcribe.sh # Schnellzugriff-Script +``` + +## 🎯 Whisper-Modelle + +| Modell | Größe | Geschwindigkeit | Genauigkeit | Verwendung | +|--------|-------|-----------------|-------------|------------| +| **tiny** | 39 MB | ~10x Echtzeit | 75% | Schnelle Tests | +| **base** | 74 MB | ~7x Echtzeit | 85% | Guter Kompromiss | +| **small** | 244 MB | ~4x Echtzeit | 91% | Solide Qualität | +| **medium** | 769 MB | ~2x Echtzeit | 94% | Hohe Qualität | +| **large** | 1.5 GB | ~1x Echtzeit | 96-98% | Beste Qualität | + +## 📋 Befehle + +### Hauptbefehle + +```bash +# Zeige alle Playlists +python3 transcriber_v3.py list + +# Verarbeite alle neuen Videos in allen Playlists +python3 transcriber_v3.py scan + +# Verarbeite einzelnes Video +python3 transcriber_v3.py process "URL" + +# Mit spezifischem Modell +python3 transcriber_v3.py scan --model large + +# Andere Sprache +python3 transcriber_v3.py scan --language en +``` + +### Optionen + +- `--model {tiny,base,small,medium,large}` - Whisper-Modell auswählen +- `--language LANG` - Sprache setzen (default: de) +- `--playlist NAME` - Spezifische Playlist verarbeiten +- `--output DIR` - Ausgabe-Verzeichnis (default: transcripts) +- `--force` - Cache ignorieren und neu transkribieren + +## 🔄 Automatisierung + +### Cron-Job einrichten + +Für tägliche automatische Verarbeitung: + +```bash +# Crontab öffnen +crontab -e + +# Täglich um 3 Uhr nachts alle Playlists scannen +0 3 * * * cd /path/to/YoutubeDL && source venv/bin/activate && python3 transcriber_v3.py scan --model large +``` + +## 💡 Tipps + +1. **Organisiere nach Themen**: Erstelle Unterordner in `playlists/` für verschiedene Themen +2. **Cache nutzen**: Das System merkt sich bereits transkribierte Videos automatisch +3. **Modell-Auswahl**: + - Nutze `tiny` für schnelle Tests + - Nutze `large` für wichtige Transkriptionen +4. **Batch-Verarbeitung**: Füge alle URLs zur Playlist hinzu und lasse über Nacht laufen + +## 🎨 Features im Detail + +### Rich Terminal UI (v2+) +- Farbige Ausgabe mit Emojis +- Progress Bars für Download und Transkription +- Zeitschätzungen basierend auf Video-Länge +- Video-Metadaten vor Download + +### Playlist-Management (v3) +- Automatisches Scannen von URL-Listen +- Themen-basierte Organisation +- Nur neue Videos werden verarbeitet +- Batch-Verarbeitung mehrerer Playlists + +### Cache-System +- Verhindert doppelte Verarbeitung +- Speichert Metadaten zu transkribierten Videos +- `.cache/transcribed_videos.json` enthält Historie + +## 🐛 Troubleshooting + +**FFmpeg nicht gefunden:** +```bash +# macOS +brew install ffmpeg +``` + +**Whisper-Modell lädt sehr lange:** +- Beim ersten Mal wird das Modell heruntergeladen +- Large: ~1.5GB, kann 10-30 Minuten dauern + +**"Video bereits transkribiert":** +- Nutze `--force` Flag zum Überschreiben +- Oder lösche `.cache/` Ordner für kompletten Reset + +## 📈 Performance (Apple Silicon M1) + +- **Tiny**: ~10x Echtzeit (6 Min Video → 36 Sek) +- **Base**: ~7x Echtzeit (6 Min Video → 50 Sek) +- **Small**: ~4x Echtzeit (6 Min Video → 1.5 Min) +- **Large**: ~1x Echtzeit (6 Min Video → 6 Min) + +## 🔒 Datenschutz + +- Alle Verarbeitung erfolgt **lokal** auf deinem Computer +- Keine Daten werden an externe Server gesendet +- Whisper läuft komplett offline + +## 📝 Lizenz + +MIT License - Siehe LICENSE Datei + +## 🙏 Credits + +- **OpenAI Whisper** - Speech-to-Text Engine +- **yt-dlp** - YouTube Download Tool +- **Rich** - Terminal UI Library +- **FFmpeg** - Audio/Video Verarbeitung + +## 🌐 Website Integration + +Das System generiert nicht nur Transkripte, sondern auch eine vollständige Website mit den aufbereiteten Inhalten. + +### Website-Features + +- **📚 Content Collections**: Strukturierte Talk-Seiten mit Markdown +- **🔍 Suchfunktion**: Volltextsuche über alle Talks +- **👤 Speaker Profile**: Übersichtsseiten für jeden Speaker +- **🏷️ Tag-System**: Kategorisierung nach Themen +- **📱 Responsive**: Optimiert für alle Geräte +- **🎨 Theming**: Verschiedene Farbschemata + +### Content-Struktur + +``` +website/src/ +├── content/ +│ ├── config.ts # Content Schema +│ └── talks/ # Aufbereitete Talk-Seiten +│ ├── simon-sinek-why-good-leaders-make-you-feel-safe.md +│ ├── simon-sinek-millennials-in-the-workplace.md +│ └── simon-sinek-love-your-work.md +├── pages/ +│ ├── speakers/ +│ │ ├── index.astro # Speaker-Übersicht +│ │ └── simon-sinek.astro # Speaker-Profile +│ └── talks/ +│ └── [slug].astro # Dynamische Talk-Seiten +└── components/ + ├── SearchableContentList.tsx # Hauptsuche + ├── ContentCard.tsx # Talk-Vorschau + └── speakers/ + ├── SpeakerHero.astro # Speaker-Header + ├── TalkGrid.astro # Talk-Grid + └── QuoteCollection.astro # Zitate-Sammlung +``` + +### Website entwickeln + +```bash +# Website Dependencies installieren +cd website +npm install + +# Entwicklungsserver starten +npm run dev + +# Website bauen für Produktion +npm run build +``` + +### Content-Erstellung Workflow + +1. **Transkription**: Videos mit Python-Backend transkribieren +2. **Content-Aufbereitung**: Markdown-Dateien mit Metadaten erstellen +3. **Speaker-Profile**: Übersichtsseiten für neue Speaker +4. **Integration**: Neue Inhalte in Suchfunktion einbinden +5. **Deployment**: Website bauen und deployen + +## 🚧 Roadmap + +- [x] **Parallel Processing** - 3-4x schnellere Transkription +- [x] **Website Integration** - Vollständige Content-Website +- [x] **Speaker Profiles** - Detaillierte Speaker-Übersichten +- [x] **Content Collections** - Strukturierte Talk-Aufbereitung +- [ ] **Admin Interface** - Web-UI für Transkriptions-Management +- [ ] **Speaker Diarization** - Wer spricht wann +- [ ] **Automatische Zusammenfassungen** - LLM-basierte Summaries +- [ ] **Export Formate** - SRT, VTT, JSON Export +- [ ] **YouTube Playlist Auto-Import** - Direkte Playlist-Integration + +--- + +**Entwickelt mit ❤️ für automatische Transkription** \ No newline at end of file diff --git a/apps/transcriber/apps/backend/.env.example b/apps/transcriber/apps/backend/.env.example new file mode 100644 index 000000000..b68e539f9 --- /dev/null +++ b/apps/transcriber/apps/backend/.env.example @@ -0,0 +1,14 @@ +# Server +PORT=3006 + +# Whisper Configuration +WHISPER_PROVIDER=openai # openai or local +WHISPER_MODEL=base # tiny, base, small, medium, large (for local) + +# OpenAI API (for cloud transcription) +OPENAI_API_KEY=sk-your-openai-api-key + +# Directories +TEMP_AUDIO_DIR=./temp_audio +TRANSCRIPTS_DIR=./data/transcripts +PLAYLISTS_DIR=./data/playlists diff --git a/apps/transcriber/apps/backend/nest-cli.json b/apps/transcriber/apps/backend/nest-cli.json new file mode 100644 index 000000000..f9aa683b1 --- /dev/null +++ b/apps/transcriber/apps/backend/nest-cli.json @@ -0,0 +1,8 @@ +{ + "$schema": "https://json.schemastore.org/nest-cli", + "collection": "@nestjs/schematics", + "sourceRoot": "src", + "compilerOptions": { + "deleteOutDir": true + } +} diff --git a/apps/transcriber/apps/backend/package.json b/apps/transcriber/apps/backend/package.json new file mode 100644 index 000000000..0071cd797 --- /dev/null +++ b/apps/transcriber/apps/backend/package.json @@ -0,0 +1,52 @@ +{ + "name": "@transcriber/backend", + "version": "1.0.0", + "private": true, + "description": "YouTube Transcriber Backend - NestJS API", + "scripts": { + "dev": "nest start --watch", + "build": "nest build", + "start": "nest start", + "start:prod": "node dist/main", + "lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix", + "type-check": "tsc --noEmit", + "test": "jest", + "test:watch": "jest --watch", + "test:cov": "jest --coverage" + }, + "dependencies": { + "@nestjs/common": "^10.4.15", + "@nestjs/config": "^3.3.0", + "@nestjs/core": "^10.4.15", + "@nestjs/platform-express": "^10.4.15", + "@nestjs/platform-socket.io": "^10.4.15", + "@nestjs/websockets": "^10.4.15", + "@types/socket.io-client": "^3.0.0", + "class-transformer": "^0.5.1", + "class-validator": "^0.14.1", + "openai": "^4.73.1", + "reflect-metadata": "^0.2.2", + "rxjs": "^7.8.1", + "socket.io": "^4.8.1", + "uuid": "^11.0.3" + }, + "devDependencies": { + "@nestjs/cli": "^10.4.9", + "@nestjs/schematics": "^10.2.3", + "@nestjs/testing": "^10.4.15", + "@types/express": "^5.0.0", + "@types/jest": "^29.5.14", + "@types/node": "^22.10.1", + "@types/uuid": "^10.0.0", + "@typescript-eslint/eslint-plugin": "^8.17.0", + "@typescript-eslint/parser": "^8.17.0", + "eslint": "^9.16.0", + "jest": "^29.7.0", + "source-map-support": "^0.5.21", + "ts-jest": "^29.2.5", + "ts-loader": "^9.5.1", + "ts-node": "^10.9.2", + "tsconfig-paths": "^4.2.0", + "typescript": "^5.7.2" + } +} diff --git a/apps/transcriber/apps/backend/src/app.module.ts b/apps/transcriber/apps/backend/src/app.module.ts new file mode 100644 index 000000000..482683707 --- /dev/null +++ b/apps/transcriber/apps/backend/src/app.module.ts @@ -0,0 +1,24 @@ +import { Module } from '@nestjs/common'; +import { ConfigModule } from '@nestjs/config'; +import { TranscriptionModule } from './transcription/transcription.module'; +import { PlaylistModule } from './playlist/playlist.module'; +import { YoutubeModule } from './youtube/youtube.module'; +import { WhisperModule } from './whisper/whisper.module'; +import { WebsocketModule } from './websocket/websocket.module'; +import { HealthModule } from './health/health.module'; + +@Module({ + imports: [ + ConfigModule.forRoot({ + isGlobal: true, + envFilePath: '.env', + }), + TranscriptionModule, + PlaylistModule, + YoutubeModule, + WhisperModule, + WebsocketModule, + HealthModule, + ], +}) +export class AppModule {} diff --git a/apps/transcriber/apps/backend/src/health/health.controller.ts b/apps/transcriber/apps/backend/src/health/health.controller.ts new file mode 100644 index 000000000..4f051ba97 --- /dev/null +++ b/apps/transcriber/apps/backend/src/health/health.controller.ts @@ -0,0 +1,30 @@ +import { Controller, Get } from '@nestjs/common'; + +@Controller('health') +export class HealthController { + @Get() + check() { + return { + status: 'ok', + timestamp: new Date().toISOString(), + service: 'transcriber-backend', + version: '1.0.0', + }; + } + + @Get('ready') + ready() { + return { + status: 'ready', + timestamp: new Date().toISOString(), + }; + } + + @Get('live') + live() { + return { + status: 'alive', + timestamp: new Date().toISOString(), + }; + } +} diff --git a/apps/transcriber/apps/backend/src/health/health.module.ts b/apps/transcriber/apps/backend/src/health/health.module.ts new file mode 100644 index 000000000..7476abedd --- /dev/null +++ b/apps/transcriber/apps/backend/src/health/health.module.ts @@ -0,0 +1,7 @@ +import { Module } from '@nestjs/common'; +import { HealthController } from './health.controller'; + +@Module({ + controllers: [HealthController], +}) +export class HealthModule {} diff --git a/apps/transcriber/apps/backend/src/main.ts b/apps/transcriber/apps/backend/src/main.ts new file mode 100644 index 000000000..a3f6334f0 --- /dev/null +++ b/apps/transcriber/apps/backend/src/main.ts @@ -0,0 +1,31 @@ +import { NestFactory } from '@nestjs/core'; +import { ValidationPipe } from '@nestjs/common'; +import { AppModule } from './app.module'; + +async function bootstrap() { + const app = await NestFactory.create(AppModule); + + app.enableCors({ + origin: [ + 'http://localhost:5173', // SvelteKit dev + 'http://localhost:4321', // Astro dev + 'http://localhost:3000', // Alternative dev + ], + credentials: true, + }); + + app.useGlobalPipes( + new ValidationPipe({ + whitelist: true, + transform: true, + forbidNonWhitelisted: true, + }), + ); + + const port = process.env.PORT || 3006; + await app.listen(port); + + console.log(`[Transcriber Backend] Running on http://localhost:${port}`); +} + +bootstrap(); diff --git a/apps/transcriber/apps/backend/src/playlist/playlist.controller.ts b/apps/transcriber/apps/backend/src/playlist/playlist.controller.ts new file mode 100644 index 000000000..3814f133f --- /dev/null +++ b/apps/transcriber/apps/backend/src/playlist/playlist.controller.ts @@ -0,0 +1,50 @@ +import { + Controller, + Get, + Post, + Delete, + Param, + Body, +} from '@nestjs/common'; +import { PlaylistService, CreatePlaylistDto } from './playlist.service'; + +@Controller('playlist') +export class PlaylistController { + constructor(private readonly playlistService: PlaylistService) {} + + @Get() + async getAll() { + return this.playlistService.getAll(); + } + + @Get(':category/:name') + async getOne( + @Param('category') category: string, + @Param('name') name: string, + ) { + return this.playlistService.getOne(category, name); + } + + @Post() + async create(@Body() dto: CreatePlaylistDto) { + return this.playlistService.create(dto); + } + + @Delete(':category/:name') + async delete( + @Param('category') category: string, + @Param('name') name: string, + ) { + await this.playlistService.delete(category, name); + return { message: 'Playlist deleted' }; + } + + @Post(':category/:name/url') + async addUrl( + @Param('category') category: string, + @Param('name') name: string, + @Body('url') url: string, + ) { + return this.playlistService.addUrl(category, name, url); + } +} diff --git a/apps/transcriber/apps/backend/src/playlist/playlist.module.ts b/apps/transcriber/apps/backend/src/playlist/playlist.module.ts new file mode 100644 index 000000000..22328a1d8 --- /dev/null +++ b/apps/transcriber/apps/backend/src/playlist/playlist.module.ts @@ -0,0 +1,10 @@ +import { Module } from '@nestjs/common'; +import { PlaylistController } from './playlist.controller'; +import { PlaylistService } from './playlist.service'; + +@Module({ + controllers: [PlaylistController], + providers: [PlaylistService], + exports: [PlaylistService], +}) +export class PlaylistModule {} diff --git a/apps/transcriber/apps/backend/src/playlist/playlist.service.ts b/apps/transcriber/apps/backend/src/playlist/playlist.service.ts new file mode 100644 index 000000000..70f917a16 --- /dev/null +++ b/apps/transcriber/apps/backend/src/playlist/playlist.service.ts @@ -0,0 +1,176 @@ +import { Injectable, Logger, NotFoundException } from '@nestjs/common'; +import { ConfigService } from '@nestjs/config'; +import * as fs from 'fs'; +import * as path from 'path'; + +export interface Playlist { + category: string; + name: string; + path: string; + urlCount: number; + urls: string[]; + description?: string; +} + +export interface CreatePlaylistDto { + name: string; + description?: string; + urls: string[]; +} + +@Injectable() +export class PlaylistService { + private readonly logger = new Logger(PlaylistService.name); + private readonly playlistsDir: string; + + constructor(private configService: ConfigService) { + this.playlistsDir = + this.configService.get('PLAYLISTS_DIR') || './data/playlists'; + + // Ensure playlists directory exists + if (!fs.existsSync(this.playlistsDir)) { + fs.mkdirSync(this.playlistsDir, { recursive: true }); + } + } + + async getAll(): Promise { + const playlists: Playlist[] = []; + + if (!fs.existsSync(this.playlistsDir)) { + return playlists; + } + + const categories = fs + .readdirSync(this.playlistsDir, { withFileTypes: true }) + .filter((d) => d.isDirectory()); + + for (const category of categories) { + const categoryPath = path.join(this.playlistsDir, category.name); + const files = fs + .readdirSync(categoryPath) + .filter((f) => f.endsWith('.txt')); + + for (const file of files) { + const filePath = path.join(categoryPath, file); + const content = fs.readFileSync(filePath, 'utf-8'); + const lines = content.split('\n'); + + let description: string | undefined; + const urls: string[] = []; + + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed.startsWith('# ') && !description) { + description = trimmed.substring(2); + } else if (trimmed && !trimmed.startsWith('#')) { + urls.push(trimmed); + } + } + + playlists.push({ + category: category.name, + name: file.replace('.txt', ''), + path: filePath, + urlCount: urls.length, + urls, + description, + }); + } + } + + return playlists; + } + + async getOne(category: string, name: string): Promise { + const filePath = path.join(this.playlistsDir, category, `${name}.txt`); + + if (!fs.existsSync(filePath)) { + throw new NotFoundException(`Playlist ${category}/${name} not found`); + } + + const content = fs.readFileSync(filePath, 'utf-8'); + const lines = content.split('\n'); + + let description: string | undefined; + const urls: string[] = []; + + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed.startsWith('# ') && !description) { + description = trimmed.substring(2); + } else if (trimmed && !trimmed.startsWith('#')) { + urls.push(trimmed); + } + } + + return { + category, + name, + path: filePath, + urlCount: urls.length, + urls, + description, + }; + } + + async create(dto: CreatePlaylistDto): Promise { + // Parse category/name format + const parts = dto.name.split('/'); + const category = parts.length > 1 ? parts[0] : 'general'; + const name = parts.length > 1 ? parts[1] : dto.name; + + const categoryDir = path.join(this.playlistsDir, category); + if (!fs.existsSync(categoryDir)) { + fs.mkdirSync(categoryDir, { recursive: true }); + } + + const filePath = path.join(categoryDir, `${name}.txt`); + + let content = ''; + if (dto.description) { + content += `# ${dto.description}\n`; + } + content += '# One URL per line\n\n'; + content += dto.urls.join('\n') + '\n'; + + fs.writeFileSync(filePath, content, 'utf-8'); + + this.logger.log(`Created playlist: ${category}/${name}`); + + return { + category, + name, + path: filePath, + urlCount: dto.urls.length, + urls: dto.urls, + description: dto.description, + }; + } + + async delete(category: string, name: string): Promise { + const filePath = path.join(this.playlistsDir, category, `${name}.txt`); + + if (!fs.existsSync(filePath)) { + throw new NotFoundException(`Playlist ${category}/${name} not found`); + } + + fs.unlinkSync(filePath); + this.logger.log(`Deleted playlist: ${category}/${name}`); + } + + async addUrl(category: string, name: string, url: string): Promise { + const playlist = await this.getOne(category, name); + playlist.urls.push(url); + + const content = + (playlist.description ? `# ${playlist.description}\n` : '') + + '# One URL per line\n\n' + + playlist.urls.join('\n') + + '\n'; + + fs.writeFileSync(playlist.path, content, 'utf-8'); + + playlist.urlCount = playlist.urls.length; + return playlist; + } +} diff --git a/apps/transcriber/apps/backend/src/transcription/dto/transcribe-request.dto.ts b/apps/transcriber/apps/backend/src/transcription/dto/transcribe-request.dto.ts new file mode 100644 index 000000000..5d029c5c2 --- /dev/null +++ b/apps/transcriber/apps/backend/src/transcription/dto/transcribe-request.dto.ts @@ -0,0 +1,35 @@ +import { IsString, IsOptional, IsUrl, IsEnum } from 'class-validator'; + +export enum WhisperProviderEnum { + GROQ = 'groq', + LOCAL = 'local', +} + +export enum WhisperModelEnum { + // Groq models (cloud) + WHISPER_LARGE_V3_TURBO = 'whisper-large-v3-turbo', + WHISPER_LARGE_V3 = 'whisper-large-v3', + // Local models + TINY = 'tiny', + BASE = 'base', + SMALL = 'small', + MEDIUM = 'medium', + LARGE = 'large', +} + +export class TranscribeRequestDto { + @IsUrl() + url: string; + + @IsString() + @IsOptional() + language?: string = 'de'; + + @IsEnum(WhisperProviderEnum) + @IsOptional() + provider?: WhisperProviderEnum; + + @IsEnum(WhisperModelEnum) + @IsOptional() + model?: WhisperModelEnum; +} diff --git a/apps/transcriber/apps/backend/src/transcription/entities/transcription-job.entity.ts b/apps/transcriber/apps/backend/src/transcription/entities/transcription-job.entity.ts new file mode 100644 index 000000000..453c784ab --- /dev/null +++ b/apps/transcriber/apps/backend/src/transcription/entities/transcription-job.entity.ts @@ -0,0 +1,52 @@ +export enum JobStatus { + PENDING = 'pending', + DOWNLOADING = 'downloading', + TRANSCRIBING = 'transcribing', + COMPLETED = 'completed', + FAILED = 'failed', + CANCELLED = 'cancelled', +} + +export interface VideoInfo { + id: string; + title: string; + description: string; + duration: number; + channel: string; + channelId: string; + thumbnail: string; + uploadDate: string; +} + +export class TranscriptionJob { + id: string; + url: string; + language: string; + provider: string; + model?: string; + status: JobStatus; + progress: number; + createdAt: Date; + completedAt?: Date; + videoInfo?: VideoInfo; + transcriptPath?: string; + transcriptText?: string; + error?: string; + + constructor( + id: string, + url: string, + language: string, + provider: string, + model?: string, + ) { + this.id = id; + this.url = url; + this.language = language; + this.provider = provider; + this.model = model; + this.status = JobStatus.PENDING; + this.progress = 0; + this.createdAt = new Date(); + } +} diff --git a/apps/transcriber/apps/backend/src/transcription/transcription.controller.ts b/apps/transcriber/apps/backend/src/transcription/transcription.controller.ts new file mode 100644 index 000000000..07ee05cb4 --- /dev/null +++ b/apps/transcriber/apps/backend/src/transcription/transcription.controller.ts @@ -0,0 +1,40 @@ +import { + Controller, + Get, + Post, + Delete, + Param, + Body, +} from '@nestjs/common'; +import { TranscriptionService } from './transcription.service'; +import { TranscribeRequestDto } from './dto/transcribe-request.dto'; + +@Controller('transcription') +export class TranscriptionController { + constructor(private readonly transcriptionService: TranscriptionService) {} + + @Post() + async createJob(@Body() dto: TranscribeRequestDto) { + return this.transcriptionService.createJob(dto); + } + + @Get() + async getAllJobs() { + return this.transcriptionService.getAllJobs(); + } + + @Get('stats') + async getStats() { + return this.transcriptionService.getStats(); + } + + @Get(':id') + async getJob(@Param('id') id: string) { + return this.transcriptionService.getJob(id); + } + + @Delete(':id') + async cancelJob(@Param('id') id: string) { + return this.transcriptionService.cancelJob(id); + } +} diff --git a/apps/transcriber/apps/backend/src/transcription/transcription.module.ts b/apps/transcriber/apps/backend/src/transcription/transcription.module.ts new file mode 100644 index 000000000..42ae8a289 --- /dev/null +++ b/apps/transcriber/apps/backend/src/transcription/transcription.module.ts @@ -0,0 +1,14 @@ +import { Module } from '@nestjs/common'; +import { TranscriptionController } from './transcription.controller'; +import { TranscriptionService } from './transcription.service'; +import { YoutubeModule } from '../youtube/youtube.module'; +import { WhisperModule } from '../whisper/whisper.module'; +import { WebsocketModule } from '../websocket/websocket.module'; + +@Module({ + imports: [YoutubeModule, WhisperModule, WebsocketModule], + controllers: [TranscriptionController], + providers: [TranscriptionService], + exports: [TranscriptionService], +}) +export class TranscriptionModule {} diff --git a/apps/transcriber/apps/backend/src/transcription/transcription.service.ts b/apps/transcriber/apps/backend/src/transcription/transcription.service.ts new file mode 100644 index 000000000..bb229c4b6 --- /dev/null +++ b/apps/transcriber/apps/backend/src/transcription/transcription.service.ts @@ -0,0 +1,267 @@ +import { Injectable, Logger, NotFoundException } from '@nestjs/common'; +import { ConfigService } from '@nestjs/config'; +import { v4 as uuidv4 } from 'uuid'; +import * as fs from 'fs'; +import * as path from 'path'; +import { YoutubeService } from '../youtube/youtube.service'; +import { WhisperService, WhisperProvider, WhisperModel } from '../whisper/whisper.service'; +import { ProgressGateway } from '../websocket/progress.gateway'; +import { + TranscriptionJob, + JobStatus, +} from './entities/transcription-job.entity'; +import { TranscribeRequestDto } from './dto/transcribe-request.dto'; + +@Injectable() +export class TranscriptionService { + private readonly logger = new Logger(TranscriptionService.name); + private readonly jobs: Map = new Map(); + private readonly transcriptsDir: string; + + constructor( + private readonly configService: ConfigService, + private readonly youtubeService: YoutubeService, + private readonly whisperService: WhisperService, + private readonly progressGateway: ProgressGateway, + ) { + this.transcriptsDir = + this.configService.get('TRANSCRIPTS_DIR') || './data/transcripts'; + + // Ensure transcripts directory exists + if (!fs.existsSync(this.transcriptsDir)) { + fs.mkdirSync(this.transcriptsDir, { recursive: true }); + } + } + + async createJob(dto: TranscribeRequestDto): Promise { + const jobId = uuidv4(); + const job = new TranscriptionJob( + jobId, + dto.url, + dto.language || 'de', + dto.provider || 'openai', + dto.model, + ); + + this.jobs.set(jobId, job); + + // Start processing in background + this.processJob(job); + + return job; + } + + async getJob(id: string): Promise { + const job = this.jobs.get(id); + if (!job) { + throw new NotFoundException(`Job ${id} not found`); + } + return job; + } + + async getAllJobs(): Promise { + return Array.from(this.jobs.values()); + } + + async cancelJob(id: string): Promise { + const job = this.jobs.get(id); + if (!job) { + throw new NotFoundException(`Job ${id} not found`); + } + + if ( + job.status === JobStatus.PENDING || + job.status === JobStatus.DOWNLOADING || + job.status === JobStatus.TRANSCRIBING + ) { + job.status = JobStatus.CANCELLED; + job.error = 'Cancelled by user'; + + this.progressGateway.broadcastJobUpdate(job.id, { + status: job.status, + error: job.error, + }); + } + + return job; + } + + private async processJob(job: TranscriptionJob): Promise { + let audioPath: string | null = null; + const jobId = job.id; + + // Helper to check if job was cancelled (re-reads from map to get current status) + const isCancelled = (): boolean => { + const currentJob = this.jobs.get(jobId); + return currentJob?.status === JobStatus.CANCELLED; + }; + + try { + // Step 1: Get video info + this.updateJobProgress(job, JobStatus.DOWNLOADING, 5); + + const videoInfo = await this.youtubeService.getVideoInfo(job.url); + job.videoInfo = videoInfo; + this.updateJobProgress(job, JobStatus.DOWNLOADING, 10); + + this.logger.log(`Processing: ${videoInfo.title}`); + + // Check if cancelled + if (isCancelled()) return; + + // Step 2: Download audio + audioPath = await this.youtubeService.downloadAudio(job.url, (progress) => { + const overallProgress = 10 + progress.percent * 0.4; // 10-50% + this.updateJobProgress(job, JobStatus.DOWNLOADING, Math.round(overallProgress)); + }); + + this.updateJobProgress(job, JobStatus.DOWNLOADING, 50); + + // Check if cancelled + if (isCancelled()) { + if (audioPath) await this.youtubeService.cleanupFile(audioPath); + return; + } + + // Step 3: Transcribe + this.updateJobProgress(job, JobStatus.TRANSCRIBING, 55); + + const result = await this.whisperService.transcribe( + audioPath, + job.language, + job.provider as WhisperProvider, + job.model as WhisperModel, + ); + + this.updateJobProgress(job, JobStatus.TRANSCRIBING, 90); + + // Check if cancelled + if (isCancelled()) { + if (audioPath) await this.youtubeService.cleanupFile(audioPath); + return; + } + + // Step 4: Save transcript + const transcriptPath = await this.saveTranscript( + job, + videoInfo, + result.text, + ); + + job.transcriptPath = transcriptPath; + job.transcriptText = result.text; + job.status = JobStatus.COMPLETED; + job.progress = 100; + job.completedAt = new Date(); + + this.progressGateway.broadcastJobUpdate(job.id, { + status: job.status, + progress: job.progress, + transcriptPath: job.transcriptPath, + }); + + this.logger.log(`Completed: ${videoInfo.title}`); + } catch (error) { + job.status = JobStatus.FAILED; + job.error = error instanceof Error ? error.message : 'Unknown error'; + + this.progressGateway.broadcastJobUpdate(job.id, { + status: job.status, + error: job.error, + }); + + this.logger.error(`Job failed: ${job.error}`); + } finally { + // Cleanup audio file + if (audioPath) { + await this.youtubeService.cleanupFile(audioPath); + } + } + } + + private updateJobProgress( + job: TranscriptionJob, + status: JobStatus, + progress: number, + ): void { + job.status = status; + job.progress = progress; + + this.progressGateway.broadcastJobUpdate(job.id, { + status: job.status, + progress: job.progress, + videoInfo: job.videoInfo, + }); + } + + private async saveTranscript( + job: TranscriptionJob, + videoInfo: { channel: string; title: string; id: string }, + text: string, + ): Promise { + // Sanitize names for filesystem + const sanitize = (str: string) => + str.replace(/[^a-z0-9äöüß\-_]/gi, '_').substring(0, 50); + + const channelDir = path.join(this.transcriptsDir, sanitize(videoInfo.channel)); + + if (!fs.existsSync(channelDir)) { + fs.mkdirSync(channelDir, { recursive: true }); + } + + const filename = `${sanitize(videoInfo.title)}_${videoInfo.id}.txt`; + const filePath = path.join(channelDir, filename); + + const content = `# ${videoInfo.title} +Channel: ${videoInfo.channel} +Video ID: ${videoInfo.id} +Language: ${job.language} +Transcribed: ${new Date().toISOString()} +Provider: ${job.provider} + +--- + +${text} +`; + + fs.writeFileSync(filePath, content, 'utf-8'); + + return filePath; + } + + async getStats() { + const jobs = Array.from(this.jobs.values()); + + let totalTranscripts = 0; + let totalSize = 0; + + if (fs.existsSync(this.transcriptsDir)) { + const countFiles = (dir: string) => { + const items = fs.readdirSync(dir, { withFileTypes: true }); + for (const item of items) { + const fullPath = path.join(dir, item.name); + if (item.isDirectory()) { + countFiles(fullPath); + } else if (item.name.endsWith('.txt')) { + totalTranscripts++; + totalSize += fs.statSync(fullPath).size; + } + } + }; + countFiles(this.transcriptsDir); + } + + return { + totalTranscripts, + totalSizeMB: Math.round((totalSize / 1024 / 1024) * 100) / 100, + activeJobs: jobs.filter( + (j) => + j.status === JobStatus.PENDING || + j.status === JobStatus.DOWNLOADING || + j.status === JobStatus.TRANSCRIBING, + ).length, + completedJobs: jobs.filter((j) => j.status === JobStatus.COMPLETED).length, + failedJobs: jobs.filter((j) => j.status === JobStatus.FAILED).length, + }; + } +} diff --git a/apps/transcriber/apps/backend/src/websocket/progress.gateway.ts b/apps/transcriber/apps/backend/src/websocket/progress.gateway.ts new file mode 100644 index 000000000..282296881 --- /dev/null +++ b/apps/transcriber/apps/backend/src/websocket/progress.gateway.ts @@ -0,0 +1,85 @@ +import { + WebSocketGateway, + WebSocketServer, + OnGatewayConnection, + OnGatewayDisconnect, +} from '@nestjs/websockets'; +import { Logger } from '@nestjs/common'; +import { Server, Socket } from 'socket.io'; + +export interface JobUpdatePayload { + status: string; + progress?: number; + error?: string; + videoInfo?: { + id: string; + title: string; + channel: string; + thumbnail: string; + }; + transcriptPath?: string; +} + +@WebSocketGateway({ + cors: { + origin: [ + 'http://localhost:5173', + 'http://localhost:4321', + 'http://localhost:3000', + ], + credentials: true, + }, + namespace: '/progress', +}) +export class ProgressGateway + implements OnGatewayConnection, OnGatewayDisconnect +{ + private readonly logger = new Logger(ProgressGateway.name); + + @WebSocketServer() + server: Server; + + handleConnection(client: Socket) { + this.logger.log(`Client connected: ${client.id}`); + + // Send heartbeat every 10 seconds + const interval = setInterval(() => { + client.emit('heartbeat', { timestamp: Date.now() }); + }, 10000); + + client.on('disconnect', () => { + clearInterval(interval); + }); + } + + handleDisconnect(client: Socket) { + this.logger.log(`Client disconnected: ${client.id}`); + } + + broadcastJobUpdate(jobId: string, payload: JobUpdatePayload) { + this.server.emit('job_update', { + type: 'job_update', + jobId, + ...payload, + timestamp: Date.now(), + }); + } + + broadcastJobComplete(jobId: string, payload: JobUpdatePayload) { + this.server.emit('job_complete', { + type: 'job_complete', + jobId, + ...payload, + timestamp: Date.now(), + }); + } + + broadcastJobError(jobId: string, error: string) { + this.server.emit('job_error', { + type: 'job_error', + jobId, + error, + timestamp: Date.now(), + }); + } +} diff --git a/apps/transcriber/apps/backend/src/websocket/websocket.module.ts b/apps/transcriber/apps/backend/src/websocket/websocket.module.ts new file mode 100644 index 000000000..aee8bf7f5 --- /dev/null +++ b/apps/transcriber/apps/backend/src/websocket/websocket.module.ts @@ -0,0 +1,9 @@ +import { Module, Global } from '@nestjs/common'; +import { ProgressGateway } from './progress.gateway'; + +@Global() +@Module({ + providers: [ProgressGateway], + exports: [ProgressGateway], +}) +export class WebsocketModule {} diff --git a/apps/transcriber/apps/backend/src/whisper/whisper.controller.ts b/apps/transcriber/apps/backend/src/whisper/whisper.controller.ts new file mode 100644 index 000000000..a6ef9cb91 --- /dev/null +++ b/apps/transcriber/apps/backend/src/whisper/whisper.controller.ts @@ -0,0 +1,17 @@ +import { Controller, Get } from '@nestjs/common'; +import { WhisperService } from './whisper.service'; + +@Controller('whisper') +export class WhisperController { + constructor(private readonly whisperService: WhisperService) {} + + @Get('models') + getModels() { + return { + models: this.whisperService.getAvailableModels(), + defaultProvider: this.whisperService.getDefaultProvider(), + defaultModel: this.whisperService.getDefaultModel(), + groqAvailable: this.whisperService.isGroqAvailable(), + }; + } +} diff --git a/apps/transcriber/apps/backend/src/whisper/whisper.module.ts b/apps/transcriber/apps/backend/src/whisper/whisper.module.ts new file mode 100644 index 000000000..77dabba75 --- /dev/null +++ b/apps/transcriber/apps/backend/src/whisper/whisper.module.ts @@ -0,0 +1,10 @@ +import { Module } from '@nestjs/common'; +import { WhisperService } from './whisper.service'; +import { WhisperController } from './whisper.controller'; + +@Module({ + controllers: [WhisperController], + providers: [WhisperService], + exports: [WhisperService], +}) +export class WhisperModule {} diff --git a/apps/transcriber/apps/backend/src/whisper/whisper.service.ts b/apps/transcriber/apps/backend/src/whisper/whisper.service.ts new file mode 100644 index 000000000..15ec65262 --- /dev/null +++ b/apps/transcriber/apps/backend/src/whisper/whisper.service.ts @@ -0,0 +1,235 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { ConfigService } from '@nestjs/config'; +import { spawn } from 'child_process'; +import * as fs from 'fs'; +import OpenAI from 'openai'; + +export type WhisperProvider = 'groq' | 'local'; +export type GroqWhisperModel = 'whisper-large-v3-turbo' | 'whisper-large-v3'; +export type LocalWhisperModel = 'tiny' | 'base' | 'small' | 'medium' | 'large'; +export type WhisperModel = GroqWhisperModel | LocalWhisperModel; + +export interface TranscriptionResult { + text: string; + language: string; + duration: number; + provider: WhisperProvider; +} + +export interface WhisperModelInfo { + name: string; + provider: WhisperProvider; + speed: string; + accuracy: string; + cost?: string; +} + +@Injectable() +export class WhisperService { + private readonly logger = new Logger(WhisperService.name); + private readonly groqClient: OpenAI | null; + private readonly defaultProvider: WhisperProvider; + private readonly defaultModel: WhisperModel; + + constructor(private configService: ConfigService) { + const groqApiKey = this.configService.get('GROQ_API_KEY'); + + if (groqApiKey) { + // Groq uses OpenAI-compatible API + this.groqClient = new OpenAI({ + apiKey: groqApiKey, + baseURL: 'https://api.groq.com/openai/v1', + }); + this.logger.log('Groq API configured successfully'); + } else { + this.groqClient = null; + this.logger.warn( + 'Groq API key not configured. Only local Whisper available.', + ); + } + + this.defaultProvider = + (this.configService.get('WHISPER_PROVIDER') as WhisperProvider) || + 'groq'; + this.defaultModel = + (this.configService.get('WHISPER_MODEL') as WhisperModel) || + 'whisper-large-v3-turbo'; + } + + async transcribe( + audioPath: string, + language: string = 'de', + provider?: WhisperProvider, + model?: WhisperModel, + ): Promise { + const selectedProvider = provider || this.defaultProvider; + const selectedModel = model || this.defaultModel; + + // Fallback to local if Groq not available + if (selectedProvider === 'groq' && !this.groqClient) { + this.logger.warn('Groq not configured, falling back to local Whisper'); + return this.transcribeWithLocalWhisper( + audioPath, + language, + selectedModel as LocalWhisperModel, + ); + } + + if (selectedProvider === 'groq') { + return this.transcribeWithGroq( + audioPath, + language, + selectedModel as GroqWhisperModel, + ); + } + + return this.transcribeWithLocalWhisper( + audioPath, + language, + selectedModel as LocalWhisperModel, + ); + } + + private async transcribeWithGroq( + audioPath: string, + language: string, + model: GroqWhisperModel = 'whisper-large-v3-turbo', + ): Promise { + if (!this.groqClient) { + throw new Error('Groq API not configured'); + } + + this.logger.log( + `Transcribing with Groq Whisper API (${model}): ${audioPath}`, + ); + + const startTime = Date.now(); + + const transcription = await this.groqClient.audio.transcriptions.create({ + file: fs.createReadStream(audioPath), + model: model, + language, + response_format: 'verbose_json', + }); + + const duration = (Date.now() - startTime) / 1000; + + this.logger.log(`Groq transcription completed in ${duration.toFixed(2)}s`); + + return { + text: transcription.text, + language: transcription.language || language, + duration, + provider: 'groq', + }; + } + + private async transcribeWithLocalWhisper( + audioPath: string, + language: string, + model: WhisperModel, + ): Promise { + this.logger.log( + `Transcribing with local Whisper (model: ${model}): ${audioPath}`, + ); + + const startTime = Date.now(); + + return new Promise((resolve, reject) => { + // Python script to run Whisper + const pythonScript = ` +import whisper +import json +import sys + +model = whisper.load_model("${model}") +result = model.transcribe("${audioPath}", language="${language}") +print(json.dumps({"text": result["text"], "language": result.get("language", "${language}")})) + `.trim(); + + const python = spawn('python3', ['-c', pythonScript]); + + let stdout = ''; + let stderr = ''; + + python.stdout.on('data', (data) => { + stdout += data.toString(); + }); + + python.stderr.on('data', (data) => { + stderr += data.toString(); + // Whisper outputs progress to stderr, log it + this.logger.debug(data.toString()); + }); + + python.on('close', (code) => { + const duration = (Date.now() - startTime) / 1000; + + if (code !== 0) { + this.logger.error(`Local Whisper error: ${stderr}`); + reject(new Error(`Transcription failed: ${stderr}`)); + return; + } + + try { + const result = JSON.parse(stdout.trim()); + resolve({ + text: result.text, + language: result.language, + duration, + provider: 'local', + }); + } catch (e) { + reject(new Error('Failed to parse transcription result')); + } + }); + }); + } + + getAvailableModels(): WhisperModelInfo[] { + const models: WhisperModelInfo[] = []; + + // Groq models (cloud, ultra-fast) + if (this.groqClient) { + models.push( + { + name: 'whisper-large-v3-turbo', + provider: 'groq', + speed: '~300x realtime', + accuracy: '95%', + cost: '$0.04/hour', + }, + { + name: 'whisper-large-v3', + provider: 'groq', + speed: '~250x realtime', + accuracy: '97%', + cost: '$0.111/hour', + }, + ); + } + + // Local models + models.push( + { name: 'tiny', provider: 'local', speed: '~10x realtime', accuracy: '75%' }, + { name: 'base', provider: 'local', speed: '~7x realtime', accuracy: '85%' }, + { name: 'small', provider: 'local', speed: '~4x realtime', accuracy: '91%' }, + { name: 'medium', provider: 'local', speed: '~2x realtime', accuracy: '94%' }, + { name: 'large', provider: 'local', speed: '~1x realtime', accuracy: '96-98%' }, + ); + + return models; + } + + isGroqAvailable(): boolean { + return this.groqClient !== null; + } + + getDefaultProvider(): WhisperProvider { + return this.defaultProvider; + } + + getDefaultModel(): WhisperModel { + return this.defaultModel; + } +} diff --git a/apps/transcriber/apps/backend/src/youtube/youtube.module.ts b/apps/transcriber/apps/backend/src/youtube/youtube.module.ts new file mode 100644 index 000000000..34099c2e2 --- /dev/null +++ b/apps/transcriber/apps/backend/src/youtube/youtube.module.ts @@ -0,0 +1,8 @@ +import { Module } from '@nestjs/common'; +import { YoutubeService } from './youtube.service'; + +@Module({ + providers: [YoutubeService], + exports: [YoutubeService], +}) +export class YoutubeModule {} diff --git a/apps/transcriber/apps/backend/src/youtube/youtube.service.ts b/apps/transcriber/apps/backend/src/youtube/youtube.service.ts new file mode 100644 index 000000000..2903ca43c --- /dev/null +++ b/apps/transcriber/apps/backend/src/youtube/youtube.service.ts @@ -0,0 +1,166 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { ConfigService } from '@nestjs/config'; +import { spawn } from 'child_process'; +import * as path from 'path'; +import * as fs from 'fs'; +import { v4 as uuidv4 } from 'uuid'; + +export interface VideoInfo { + id: string; + title: string; + description: string; + duration: number; + channel: string; + channelId: string; + thumbnail: string; + uploadDate: string; +} + +export interface DownloadProgress { + percent: number; + speed: string; + eta: string; +} + +@Injectable() +export class YoutubeService { + private readonly logger = new Logger(YoutubeService.name); + private readonly tempDir: string; + + constructor(private configService: ConfigService) { + this.tempDir = + this.configService.get('TEMP_AUDIO_DIR') || './temp_audio'; + + // Ensure temp directory exists + if (!fs.existsSync(this.tempDir)) { + fs.mkdirSync(this.tempDir, { recursive: true }); + } + } + + async getVideoInfo(url: string): Promise { + return new Promise((resolve, reject) => { + const ytdlp = spawn('yt-dlp', ['--dump-json', '--no-download', url]); + + let stdout = ''; + let stderr = ''; + + ytdlp.stdout.on('data', (data) => { + stdout += data.toString(); + }); + + ytdlp.stderr.on('data', (data) => { + stderr += data.toString(); + }); + + ytdlp.on('close', (code) => { + if (code !== 0) { + this.logger.error(`yt-dlp info error: ${stderr}`); + reject(new Error(`Failed to get video info: ${stderr}`)); + return; + } + + try { + const info = JSON.parse(stdout); + resolve({ + id: info.id, + title: info.title, + description: info.description || '', + duration: info.duration, + channel: info.channel || info.uploader, + channelId: info.channel_id || info.uploader_id, + thumbnail: info.thumbnail, + uploadDate: info.upload_date, + }); + } catch (e) { + reject(new Error('Failed to parse video info')); + } + }); + }); + } + + async downloadAudio( + url: string, + onProgress?: (progress: DownloadProgress) => void, + ): Promise { + const outputId = uuidv4(); + const outputPath = path.join(this.tempDir, `${outputId}.mp3`); + + return new Promise((resolve, reject) => { + const ytdlp = spawn('yt-dlp', [ + '-x', + '--audio-format', + 'mp3', + '--audio-quality', + '0', + '-o', + outputPath.replace('.mp3', '.%(ext)s'), + '--newline', + url, + ]); + + let stderr = ''; + + ytdlp.stdout.on('data', (data) => { + const line = data.toString(); + + // Parse download progress + const progressMatch = line.match( + /(\d+\.?\d*)%.*?(\d+\.?\d*\w+\/s).*?ETA\s+(\d+:\d+)/, + ); + if (progressMatch && onProgress) { + onProgress({ + percent: parseFloat(progressMatch[1]), + speed: progressMatch[2], + eta: progressMatch[3], + }); + } + }); + + ytdlp.stderr.on('data', (data) => { + stderr += data.toString(); + }); + + ytdlp.on('close', (code) => { + if (code !== 0) { + this.logger.error(`yt-dlp download error: ${stderr}`); + reject(new Error(`Download failed: ${stderr}`)); + return; + } + + // Find the actual output file (might have different extension initially) + const files = fs.readdirSync(this.tempDir); + const outputFile = files.find((f) => f.startsWith(outputId)); + + if (!outputFile) { + reject(new Error('Output file not found')); + return; + } + + const actualPath = path.join(this.tempDir, outputFile); + this.logger.log(`Downloaded audio to: ${actualPath}`); + resolve(actualPath); + }); + }); + } + + async cleanupFile(filePath: string): Promise { + try { + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + this.logger.log(`Cleaned up: ${filePath}`); + } + } catch (e) { + this.logger.warn(`Failed to cleanup file: ${filePath}`); + } + } + + isValidYoutubeUrl(url: string): boolean { + const patterns = [ + /^(https?:\/\/)?(www\.)?(youtube\.com|youtu\.be)\//, + /^(https?:\/\/)?(www\.)?youtube\.com\/watch\?v=/, + /^(https?:\/\/)?youtu\.be\//, + ]; + + return patterns.some((pattern) => pattern.test(url)); + } +} diff --git a/apps/transcriber/apps/backend/tsconfig.json b/apps/transcriber/apps/backend/tsconfig.json new file mode 100644 index 000000000..37af6fa5c --- /dev/null +++ b/apps/transcriber/apps/backend/tsconfig.json @@ -0,0 +1,26 @@ +{ + "compilerOptions": { + "module": "commonjs", + "declaration": true, + "removeComments": true, + "emitDecoratorMetadata": true, + "experimentalDecorators": true, + "allowSyntheticDefaultImports": true, + "target": "ES2021", + "sourceMap": true, + "outDir": "./dist", + "baseUrl": "./", + "incremental": true, + "skipLibCheck": true, + "strictNullChecks": true, + "noImplicitAny": true, + "strictBindCallApply": true, + "forceConsistentCasingInFileNames": true, + "noFallthroughCasesInSwitch": true, + "paths": { + "@/*": ["src/*"] + } + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/apps/transcriber/apps/landing/astro.config.mjs b/apps/transcriber/apps/landing/astro.config.mjs new file mode 100644 index 000000000..c9eefa6a7 --- /dev/null +++ b/apps/transcriber/apps/landing/astro.config.mjs @@ -0,0 +1,11 @@ +import { defineConfig } from 'astro/config'; +import solidJs from '@astrojs/solid-js'; +import tailwind from '@astrojs/tailwind'; + +// https://astro.build/config +export default defineConfig({ + integrations: [ + solidJs(), + tailwind() + ] +}); \ No newline at end of file diff --git a/apps/transcriber/apps/landing/package.json b/apps/transcriber/apps/landing/package.json new file mode 100644 index 000000000..ccb6938fd --- /dev/null +++ b/apps/transcriber/apps/landing/package.json @@ -0,0 +1,27 @@ +{ + "name": "@transcriber/landing", + "type": "module", + "version": "1.0.0", + "private": true, + "scripts": { + "dev": "astro dev", + "build": "astro build", + "preview": "astro preview", + "lint": "eslint .", + "type-check": "astro check" + }, + "dependencies": { + "@astrojs/solid-js": "^4.4.0", + "astro": "^4.16.0", + "solid-js": "^1.9.0" + }, + "devDependencies": { + "@astrojs/check": "^0.9.0", + "@astrojs/tailwind": "^5.1.0", + "@types/node": "^22.10.1", + "autoprefixer": "^10.4.20", + "postcss": "^8.4.49", + "tailwindcss": "^3.4.15", + "typescript": "^5.7.2" + } +} diff --git a/apps/transcriber/apps/landing/src/components/CollapsibleSection.astro b/apps/transcriber/apps/landing/src/components/CollapsibleSection.astro new file mode 100644 index 000000000..549f3810a --- /dev/null +++ b/apps/transcriber/apps/landing/src/components/CollapsibleSection.astro @@ -0,0 +1,162 @@ +--- +export interface Props { + title: string; + icon?: string; + defaultCollapsed?: boolean; + className?: string; +} + +const { title, icon = '📌', defaultCollapsed = false, className = '' } = Astro.props; +const sectionId = title.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''); +--- + +
+ +
+
+ +
+
+
+ + + + \ No newline at end of file diff --git a/apps/transcriber/apps/landing/src/components/ContentCard.tsx b/apps/transcriber/apps/landing/src/components/ContentCard.tsx new file mode 100644 index 000000000..0b754e95c --- /dev/null +++ b/apps/transcriber/apps/landing/src/components/ContentCard.tsx @@ -0,0 +1,103 @@ +import { Component } from 'solid-js'; + +interface ContentCardProps { + title: string; + speaker: string; + speakerId?: string; + duration: string; + excerpt: string; + tags: string[]; + link: string; + date?: string; + thumbnail?: string; + views?: string; +} + +const ContentCard: Component = (props) => { + return ( + + {/* Card Container with hover effects */} +
+ + {/* Gradient overlay on hover */} +
+ + + {/* Content section */} +
+ {/* Title */} +

+ {props.title} +

+ + {/* Meta information */} +
+ {props.speakerId ? ( + { + e.stopPropagation(); + }} + > + 🎤 + {props.speaker} + + ) : ( + + 🎤 + {props.speaker} + + )} + + ⏱️ {props.duration} + {props.date && ( + <> + + {props.date} + + )} + {props.views && ( + <> + + 👁️ {props.views} + + )} +
+ + {/* Excerpt */} +

+ {props.excerpt} +

+ + {/* Tags */} +
+ {props.tags.map(tag => ( + + {tag} + + ))} +
+ + {/* CTA Text (no longer a link since whole card is clickable) */} +
+ Weiterlesen + + + +
+
+ + {/* Decorative corner accent */} +
+
+ + ); +}; + +export default ContentCard; \ No newline at end of file diff --git a/apps/transcriber/apps/landing/src/components/ContentCardList.tsx b/apps/transcriber/apps/landing/src/components/ContentCardList.tsx new file mode 100644 index 000000000..4436178bd --- /dev/null +++ b/apps/transcriber/apps/landing/src/components/ContentCardList.tsx @@ -0,0 +1,151 @@ +import { Component, For, createSignal, onMount } from 'solid-js'; +import ContentCard from './ContentCard'; + +interface Talk { + id: string; + title: string; + speaker: string; + speakerId?: string; + duration: string; + excerpt: string; + tags: string[]; + link: string; + date?: string; + thumbnail?: string; + views?: string; +} + +const ContentCardList: Component = () => { + const [talks, setTalks] = createSignal([]); + const [loading, setLoading] = createSignal(true); + + // Mock data - später durch API-Call ersetzen + onMount(() => { + // Simuliere API-Call + setTimeout(() => { + setTalks([ + { + id: '1', + title: 'Perspective is Everything: The Psychology of Reframing', + speaker: 'Rory Sutherland', + speakerId: 'rory-sutherland', + duration: '18 Min', + excerpt: 'Wie kleine Änderungen in der Perspektive große Auswirkungen auf unser Verhalten und unsere Entscheidungen haben können. Ein faszinierender Einblick in die Verhaltensökonomie.', + tags: ['Behavioral Economics', 'Psychology', 'Marketing'], + link: '/talks/rory-sutherland-perspective-is-everything', + date: '15. März 2024', + views: '12.5k' + }, + { + id: '2', + title: 'The Power of Psychological Solutions', + speaker: 'Rory Sutherland', + speakerId: 'rory-sutherland', + duration: '22 Min', + excerpt: 'Warum psychologische Lösungen oft effektiver und günstiger sind als technische. Sutherland zeigt, wie wir Probleme neu denken können.', + tags: ['Innovation', 'Problem Solving', 'Design Thinking'], + link: '/talks/rory-sutherland-psychological-solutions', + date: '10. März 2024', + views: '8.3k' + }, + { + id: '3', + title: 'Marketing Secrets from Behavioral Science', + speaker: 'Rory Sutherland', + speakerId: 'rory-sutherland', + duration: '25 Min', + excerpt: 'Die verborgenen psychologischen Mechanismen hinter erfolgreichem Marketing. Erkenntnisse aus jahrzehntelanger Erfahrung bei Ogilvy.', + tags: ['Marketing', 'Consumer Behavior', 'Branding'], + link: '/talks/rory-sutherland-marketing-secrets', + date: '5. März 2024', + views: '15.7k' + }, + { + id: '4', + title: 'Why Context Matters More Than Content', + speaker: 'Rory Sutherland', + speakerId: 'rory-sutherland', + duration: '20 Min', + excerpt: 'Der Kontext bestimmt, wie wir Informationen wahrnehmen und interpretieren. Eine Lektion in der Kunst der Kommunikation.', + tags: ['Communication', 'Perception', 'Context'], + link: '/talks/rory-sutherland-context-matters', + date: '1. März 2024', + views: '6.2k' + }, + { + id: '5', + title: 'The Irrational Consumer: Understanding Human Behavior', + speaker: 'Rory Sutherland', + speakerId: 'rory-sutherland', + duration: '30 Min', + excerpt: 'Menschen sind keine rationalen Akteure. Wie wir diese Erkenntnis nutzen können, um bessere Produkte und Services zu entwickeln.', + tags: ['Consumer Psychology', 'Behavioral Economics', 'UX Design'], + link: '/talks/rory-sutherland-irrational-consumer', + date: '25. Februar 2024', + views: '10.1k' + }, + { + id: '6', + title: 'Alchemy: The Magic of Ideas', + speaker: 'Rory Sutherland', + speakerId: 'rory-sutherland', + duration: '28 Min', + excerpt: 'Große Ideen kommen oft aus unerwarteten Ecken. Sutherland erklärt, warum Logik allein nicht ausreicht, um Innovation zu schaffen.', + tags: ['Creativity', 'Innovation', 'Ideas'], + link: '/talks/rory-sutherland-alchemy', + date: '20. Februar 2024', + views: '18.9k' + } + ]); + setLoading(false); + }, 500); + }); + + return ( +
+ {loading() ? ( + // Loading skeleton +
+ + {() => ( +
+
+
+
+
+
+
+
+
+
+
+
+ )} +
+
+ ) : ( + // Content cards grid +
+ + {(talk) => ( + + )} + +
+ )} +
+ ); +}; + +export default ContentCardList; \ No newline at end of file diff --git a/apps/transcriber/apps/landing/src/components/Footer.astro b/apps/transcriber/apps/landing/src/components/Footer.astro new file mode 100644 index 000000000..96f15e3b0 --- /dev/null +++ b/apps/transcriber/apps/landing/src/components/Footer.astro @@ -0,0 +1,105 @@ +--- +const currentYear = new Date().getFullYear(); +--- + + \ No newline at end of file diff --git a/apps/transcriber/apps/landing/src/components/Navigation.astro b/apps/transcriber/apps/landing/src/components/Navigation.astro new file mode 100644 index 000000000..cc9a22901 --- /dev/null +++ b/apps/transcriber/apps/landing/src/components/Navigation.astro @@ -0,0 +1,108 @@ +--- +export interface Props { + currentPath?: string; +} + +const { currentPath = "/" } = Astro.props; +--- + + + + \ No newline at end of file diff --git a/apps/transcriber/apps/landing/src/components/SearchableContentList.tsx b/apps/transcriber/apps/landing/src/components/SearchableContentList.tsx new file mode 100644 index 000000000..50241ec9e --- /dev/null +++ b/apps/transcriber/apps/landing/src/components/SearchableContentList.tsx @@ -0,0 +1,322 @@ +import { Component, For, createSignal, onMount, createMemo } from 'solid-js'; +import ContentCard from './ContentCard'; + +interface Talk { + id: string; + title: string; + speaker: string; + duration: string; + excerpt: string; + tags: string[]; + link: string; + date?: string; + thumbnail?: string; + views?: string; +} + +const SearchableContentList: Component = () => { + const [talks, setTalks] = createSignal([]); + const [loading, setLoading] = createSignal(true); + const [searchQuery, setSearchQuery] = createSignal(''); + + // Mock data - später durch API-Call ersetzen + onMount(() => { + // Simuliere API-Call + setTimeout(() => { + setTalks([ + { + id: '1', + title: 'Perspective is Everything: The Psychology of Reframing', + speaker: 'Rory Sutherland', + duration: '18 Min', + excerpt: 'Wie kleine Änderungen in der Perspektive große Auswirkungen auf unser Verhalten und unsere Entscheidungen haben können. Ein faszinierender Einblick in die Verhaltensökonomie.', + tags: ['Behavioral Economics', 'Psychology', 'Marketing'], + link: '/talks/rory-sutherland-perspective-is-everything', + date: '15. März 2024', + views: '12.5k' + }, + { + id: '2', + title: 'The Power of Psychological Solutions', + speaker: 'Rory Sutherland', + duration: '22 Min', + excerpt: 'Warum psychologische Lösungen oft effektiver und günstiger sind als technische. Sutherland zeigt, wie wir Probleme neu denken können.', + tags: ['Innovation', 'Problem Solving', 'Design Thinking'], + link: '/talks/rory-sutherland-psychological-solutions', + date: '10. März 2024', + views: '8.3k' + }, + { + id: '3', + title: 'Marketing Secrets from Behavioral Science', + speaker: 'Rory Sutherland', + duration: '25 Min', + excerpt: 'Die verborgenen psychologischen Mechanismen hinter erfolgreichem Marketing. Erkenntnisse aus jahrzehntelanger Erfahrung bei Ogilvy.', + tags: ['Marketing', 'Consumer Behavior', 'Branding'], + link: '/talks/rory-sutherland-marketing-secrets', + date: '5. März 2024', + views: '15.7k' + }, + { + id: '4', + title: 'Why Context Matters More Than Content', + speaker: 'Rory Sutherland', + duration: '20 Min', + excerpt: 'Der Kontext bestimmt, wie wir Informationen wahrnehmen und interpretieren. Eine Lektion in der Kunst der Kommunikation.', + tags: ['Communication', 'Perception', 'Context'], + link: '/talks/rory-sutherland-context-matters', + date: '1. März 2024', + views: '6.2k' + }, + { + id: '5', + title: 'The Irrational Consumer: Understanding Human Behavior', + speaker: 'Rory Sutherland', + duration: '30 Min', + excerpt: 'Menschen sind keine rationalen Akteure. Wie wir diese Erkenntnis nutzen können, um bessere Produkte und Services zu entwickeln.', + tags: ['Consumer Psychology', 'Behavioral Economics', 'UX Design'], + link: '/talks/rory-sutherland-irrational-consumer', + date: '25. Februar 2024', + views: '10.1k' + }, + { + id: '6', + title: 'Alchemy: The Magic of Ideas', + speaker: 'Rory Sutherland', + duration: '28 Min', + excerpt: 'Große Ideen kommen oft aus unerwarteten Ecken. Sutherland erklärt, warum Logik allein nicht ausreicht, um Innovation zu schaffen.', + tags: ['Creativity', 'Innovation', 'Ideas'], + link: '/talks/rory-sutherland-alchemy', + date: '20. Februar 2024', + views: '18.9k' + }, + { + id: '7', + title: 'How Great Leaders Inspire Action (Start with Why)', + speaker: 'Simon Sinek', + duration: '18 Min', + excerpt: 'Simon Sineks berühmter TED Talk über das Golden Circle Modell - warum großartige Führungskräfte mit dem "Warum" beginnen und wie dies das Verhalten und die Loyalität von Menschen beeinflusst.', + tags: ['Leadership', 'Purpose', 'Golden Circle', 'Inspiration'], + link: '/speakers/simon-sinek', + date: '9. September 2024', + views: '60M+' + }, + { + id: '8', + title: 'Why Good Leaders Make You Feel Safe', + speaker: 'Simon Sinek', + duration: '12 Min', + excerpt: 'Ein kraftvoller Vortrag darüber, wie echte Führung bedeutet, Sicherheit für das Team zu schaffen, damit Menschen ihr Bestes geben können und bereit sind, füreinander einzustehen.', + tags: ['Leadership', 'Trust', 'Safety', 'Team Building'], + link: '/speakers/simon-sinek', + date: '9. September 2024', + views: '18M+' + }, + { + id: '9', + title: 'Millennials in the Workplace', + speaker: 'Simon Sinek', + duration: '15 Min', + excerpt: 'Simon Sineks virales Interview über die Herausforderungen der Millennial-Generation im Arbeitsplatz - von der Auswirkung der Technologie bis hin zu veränderten Arbeitserwartungen.', + tags: ['Millennials', 'Workplace', 'Technology', 'Generational Change'], + link: '/speakers/simon-sinek', + date: '9. September 2024', + views: '100M+' + }, + { + id: '10', + title: 'Love Your Work', + speaker: 'Simon Sinek', + duration: '42 Min', + excerpt: 'Ein inspirierender Talk über die Bedeutung von Leidenschaft bei der Arbeit und wie man eine Karriere aufbaut, die nicht nur erfolgreich, sondern auch erfüllend ist.', + tags: ['Career', 'Passion', 'Purpose', 'Work-Life Balance'], + link: '/speakers/simon-sinek', + date: '9. September 2024', + views: '2.8M' + }, + { + id: '11', + title: 'The Future of AI and Machine Learning', + speaker: 'Andrew Ng', + duration: '35 Min', + excerpt: 'Ein tiefer Einblick in die Zukunft der künstlichen Intelligenz und wie Machine Learning unsere Welt verändern wird.', + tags: ['AI', 'Machine Learning', 'Technology'], + link: '/talks/andrew-ng-future-of-ai', + date: '18. Februar 2024', + views: '22.3k' + }, + { + id: '12', + title: 'Building Resilient Systems', + speaker: 'Martin Fowler', + duration: '40 Min', + excerpt: 'Wie man Software-Systeme baut, die robust, wartbar und skalierbar sind. Best Practices aus jahrzehntelanger Erfahrung.', + tags: ['Software Architecture', 'Engineering', 'Best Practices'], + link: '/talks/martin-fowler-resilient-systems', + date: '15. Februar 2024', + views: '9.8k' + }, + { + id: '13', + title: 'The Psychology of Money', + speaker: 'Morgan Housel', + duration: '32 Min', + excerpt: 'Warum kluge Menschen dumme Dinge mit Geld machen und wie unsere Psychologie unsere finanziellen Entscheidungen beeinflusst.', + tags: ['Finance', 'Psychology', 'Behavioral Economics'], + link: '/talks/morgan-housel-psychology-of-money', + date: '10. Februar 2024', + views: '25.6k' + } + ]); + setLoading(false); + }, 500); + }); + + // Filtered talks based on search query + const filteredTalks = createMemo(() => { + const query = searchQuery().toLowerCase(); + if (!query) return talks(); + + return talks().filter(talk => { + return ( + talk.title.toLowerCase().includes(query) || + talk.speaker.toLowerCase().includes(query) || + talk.excerpt.toLowerCase().includes(query) || + talk.tags.some(tag => tag.toLowerCase().includes(query)) + ); + }); + }); + + // Handle search input + const handleSearch = (e: Event) => { + const target = e.target as HTMLInputElement; + setSearchQuery(target.value); + }; + + return ( +
+ {/* Search Bar */} +
+
+ + + + + + {/* Clear button */} + {searchQuery() && ( + + )} +
+ + {/* Search results count */} + {searchQuery() && !loading() && ( +
+ {filteredTalks().length === 0 ? ( + Keine Ergebnisse für "{searchQuery()}" + ) : ( + + {filteredTalks().length} {filteredTalks().length === 1 ? 'Ergebnis' : 'Ergebnisse'} für "{searchQuery()}" + + )} +
+ )} +
+ + {loading() ? ( + // Loading skeleton +
+ + {() => ( +
+
+
+
+
+
+
+
+
+
+
+
+ )} +
+
+ ) : ( + <> + {filteredTalks().length === 0 && searchQuery() ? ( + // No results state +
+
🔍
+

Keine Treffer

+

+ Versuche es mit anderen Suchbegriffen oder browse durch alle verfügbaren Vorträge. +

+ +
+ ) : ( + // Content cards grid with fade-in animation +
+ + {(talk, index) => ( +
+ +
+ )} +
+
+ )} + + )} + + +
+ ); +}; + +export default SearchableContentList; \ No newline at end of file diff --git a/apps/transcriber/apps/landing/src/components/TalksSidebar.astro b/apps/transcriber/apps/landing/src/components/TalksSidebar.astro new file mode 100644 index 000000000..d3dd4b5eb --- /dev/null +++ b/apps/transcriber/apps/landing/src/components/TalksSidebar.astro @@ -0,0 +1,309 @@ +--- +import { getCollection } from 'astro:content'; + +const talks = await getCollection('talks'); +const currentPath = Astro.url.pathname; + +// Sort talks by date (newest first) +const sortedTalks = talks.sort((a, b) => { + return new Date(b.data.date).getTime() - new Date(a.data.date).getTime(); +}); + +// Group talks by speaker +const talksBySpeaker = sortedTalks.reduce((acc, talk) => { + const speaker = talk.data.speaker; + if (!acc[speaker]) { + acc[speaker] = []; + } + acc[speaker].push(talk); + return acc; +}, {} as Record); +--- + + + + \ No newline at end of file diff --git a/apps/transcriber/apps/landing/src/components/ThemeSwitcher.astro b/apps/transcriber/apps/landing/src/components/ThemeSwitcher.astro new file mode 100644 index 000000000..953d7ff7c --- /dev/null +++ b/apps/transcriber/apps/landing/src/components/ThemeSwitcher.astro @@ -0,0 +1,257 @@ +--- +--- + +
+ +
+ + + +
+ + + +
+ + \ No newline at end of file diff --git a/apps/transcriber/apps/landing/src/components/admin/Dashboard.tsx b/apps/transcriber/apps/landing/src/components/admin/Dashboard.tsx new file mode 100644 index 000000000..275c7ae2b --- /dev/null +++ b/apps/transcriber/apps/landing/src/components/admin/Dashboard.tsx @@ -0,0 +1,225 @@ +import { createSignal, createEffect, onMount, For } from 'solid-js'; + +interface Job { + id: string; + url: string; + status: string; + progress: number; + created_at: string; + video_info: any; +} + +interface Stats { + total_transcripts: number; + total_size_mb: number; + active_jobs: number; + completed_jobs: number; + failed_jobs: number; +} + +const API_URL = 'http://localhost:8000'; + +export default function Dashboard() { + const [jobs, setJobs] = createSignal([]); + const [stats, setStats] = createSignal(null); + const [newUrl, setNewUrl] = createSignal(''); + const [selectedModel, setSelectedModel] = createSignal('base'); + const [isLoading, setIsLoading] = createSignal(false); + const [ws, setWs] = createSignal(null); + + onMount(() => { + fetchJobs(); + fetchStats(); + connectWebSocket(); + }); + + const connectWebSocket = () => { + const websocket = new WebSocket(`ws://localhost:8000/ws/progress`); + + websocket.onopen = () => { + console.log('WebSocket connected'); + }; + + websocket.onmessage = (event) => { + const data = JSON.parse(event.data); + if (data.type === 'job_update' || data.type === 'job_complete') { + fetchJobs(); + fetchStats(); + } + }; + + websocket.onerror = (error) => { + console.error('WebSocket error:', error); + }; + + setWs(websocket); + }; + + const fetchJobs = async () => { + try { + const response = await fetch(`${API_URL}/api/jobs`); + const data = await response.json(); + setJobs(data); + } catch (error) { + console.error('Error fetching jobs:', error); + } + }; + + const fetchStats = async () => { + try { + const response = await fetch(`${API_URL}/api/stats`); + const data = await response.json(); + setStats(data); + } catch (error) { + console.error('Error fetching stats:', error); + } + }; + + const startTranscription = async () => { + if (!newUrl()) return; + + setIsLoading(true); + try { + const response = await fetch(`${API_URL}/api/transcribe`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + url: newUrl(), + model: selectedModel(), + language: 'de' + }), + }); + + if (response.ok) { + setNewUrl(''); + fetchJobs(); + fetchStats(); + } + } catch (error) { + console.error('Error starting transcription:', error); + } + setIsLoading(false); + }; + + const getStatusColor = (status: string) => { + switch (status) { + case 'pending': return 'text-yellow-400'; + case 'downloading': return 'text-blue-400'; + case 'transcribing': return 'text-purple-400'; + case 'completed': return 'text-green-400'; + case 'failed': return 'text-red-400'; + default: return 'text-gray-400'; + } + }; + + const getStatusIcon = (status: string) => { + switch (status) { + case 'pending': return '⏳'; + case 'downloading': return '⬇️'; + case 'transcribing': return '🎙️'; + case 'completed': return '✅'; + case 'failed': return '❌'; + default: return '❓'; + } + }; + + return ( +
+ {/* Stats Cards */} +
+
+
{stats()?.total_transcripts || 0}
+
Transkripte
+
+
+
{stats()?.total_size_mb || 0} MB
+
Speicher
+
+
+
{stats()?.active_jobs || 0}
+
Aktiv
+
+
+
{stats()?.completed_jobs || 0}
+
Fertig
+
+
+
{stats()?.failed_jobs || 0}
+
Fehler
+
+
+ + {/* New Transcription Form */} +
+

Neue Transkription

+
+ setNewUrl(e.currentTarget.value)} + placeholder="YouTube URL eingeben..." + class="flex-1 px-4 py-2 bg-gray-700 text-white rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500" + /> + + +
+
+ + {/* Active Jobs */} +
+

Aktive Jobs

+
+ + {(job) => ( +
+
+
+ {getStatusIcon(job.status)} + + {job.status.toUpperCase()} + +
+
+ {new Date(job.created_at).toLocaleString('de-DE')} +
+
+
{job.url}
+ {job.status !== 'completed' && job.status !== 'failed' && ( +
+
+
+ )} +
+ )} + + {jobs().length === 0 && ( +
+ Keine aktiven Jobs +
+ )} +
+
+
+ ); +} \ No newline at end of file diff --git a/apps/transcriber/apps/landing/src/components/admin/PlaylistManager.tsx b/apps/transcriber/apps/landing/src/components/admin/PlaylistManager.tsx new file mode 100644 index 000000000..3e3d48a50 --- /dev/null +++ b/apps/transcriber/apps/landing/src/components/admin/PlaylistManager.tsx @@ -0,0 +1,256 @@ +import { createSignal, createEffect, onMount, For, Show } from 'solid-js'; + +interface Playlist { + category: string; + name: string; + path: string; + url_count: number; + urls: string[]; +} + +const API_URL = 'http://localhost:8000'; + +export default function PlaylistManager() { + const [playlists, setPlaylists] = createSignal([]); + const [selectedPlaylist, setSelectedPlaylist] = createSignal(null); + const [newPlaylistName, setNewPlaylistName] = createSignal(''); + const [newPlaylistCategory, setNewPlaylistCategory] = createSignal('general'); + const [newUrls, setNewUrls] = createSignal(''); + const [isCreating, setIsCreating] = createSignal(false); + const [isProcessing, setIsProcessing] = createSignal(false); + + onMount(() => { + fetchPlaylists(); + }); + + const fetchPlaylists = async () => { + try { + const response = await fetch(`${API_URL}/api/playlists`); + const data = await response.json(); + setPlaylists(data); + } catch (error) { + console.error('Error fetching playlists:', error); + } + }; + + const createPlaylist = async () => { + if (!newPlaylistName() || !newUrls()) return; + + try { + const urls = newUrls().split('\n').filter(url => url.trim()); + const name = newPlaylistCategory() === 'general' + ? newPlaylistName() + : `${newPlaylistCategory()}/${newPlaylistName()}`; + + const response = await fetch(`${API_URL}/api/playlists`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + name: name, + urls: urls + }), + }); + + if (response.ok) { + setNewPlaylistName(''); + setNewUrls(''); + setIsCreating(false); + fetchPlaylists(); + } + } catch (error) { + console.error('Error creating playlist:', error); + } + }; + + const processPlaylist = async (playlist: Playlist) => { + setIsProcessing(true); + try { + // Process each URL in the playlist + for (const url of playlist.urls) { + await fetch(`${API_URL}/api/transcribe`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + url: url, + model: 'large', + language: 'de' + }), + }); + } + alert(`Started processing ${playlist.url_count} videos from ${playlist.name}`); + } catch (error) { + console.error('Error processing playlist:', error); + } + setIsProcessing(false); + }; + + const getCategoryColor = (category: string) => { + const colors: { [key: string]: string } = { + 'tech': 'bg-blue-900', + 'people': 'bg-purple-900', + 'musik': 'bg-pink-900', + 'gaming': 'bg-green-900', + 'general': 'bg-gray-800' + }; + return colors[category] || 'bg-gray-800'; + }; + + const getCategoryIcon = (category: string) => { + const icons: { [key: string]: string } = { + 'tech': '💻', + 'people': '👥', + 'musik': '🎵', + 'gaming': '🎮', + 'general': '📁' + }; + return icons[category] || '📁'; + }; + + return ( +
+ {/* Header with Create Button */} +
+

Playlists

+ +
+ + {/* Create New Playlist Form */} + +
+

Neue Playlist erstellen

+
+
+ + setNewPlaylistName(e.currentTarget.value)} + placeholder="Playlist Name..." + class="flex-1 px-4 py-2 bg-gray-700 text-white rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500" + /> +
+