From 06107f6a52636d88798d401167be77a73fa728c4 Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 2 Apr 2026 01:17:47 +0200 Subject: [PATCH] feat(mana-video-gen): add AI video generation service with LTX-Video New GPU service for fast text-to-video generation using LTX-Video (~2B params) on the RTX 3090. Generates 480p clips in 10-30 seconds, uses ~10GB VRAM. Includes Cloudflare Tunnel route, Prometheus monitoring, and health checks. Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 2 + cloudflared-config.yml | 2 + docker/prometheus/prometheus.yml | 10 + docs/WINDOWS_GPU_SERVER_SETUP.md | 10 + scripts/mac-mini/health-check.sh | 1 + scripts/mac-mini/status.sh | 1 + services/mana-video-gen/.env.example | 28 ++ services/mana-video-gen/CLAUDE.md | 172 +++++++++ services/mana-video-gen/app/__init__.py | 1 + services/mana-video-gen/app/ltx_service.py | 277 ++++++++++++++ services/mana-video-gen/app/main.py | 404 +++++++++++++++++++++ services/mana-video-gen/requirements.txt | 20 + services/mana-video-gen/setup.sh | 89 +++++ 13 files changed, 1017 insertions(+) create mode 100644 services/mana-video-gen/.env.example create mode 100644 services/mana-video-gen/CLAUDE.md create mode 100644 services/mana-video-gen/app/__init__.py create mode 100644 services/mana-video-gen/app/ltx_service.py create mode 100644 services/mana-video-gen/app/main.py create mode 100644 services/mana-video-gen/requirements.txt create mode 100755 services/mana-video-gen/setup.sh diff --git a/CLAUDE.md b/CLAUDE.md index d45b8bac8..b1d01f518 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -183,6 +183,7 @@ manacore-monorepo/ │ ├── mana-media/ # Media platform: CAS, thumbnails (Hono + Bun) │ ├── mana-llm/ # LLM abstraction (Python/FastAPI) │ ├── mana-image-gen/ # AI image generation with FLUX (Python/FastAPI) +│ ├── mana-video-gen/ # AI video generation with LTX-Video (Python/FastAPI) │ ├── mana-stt/ # Speech-to-text (Python/FastAPI) │ ├── mana-tts/ # Text-to-speech (Python/FastAPI) │ ├── mana-voice-bot/ # Voice assistant (Python/FastAPI) @@ -920,6 +921,7 @@ Host mana-gpu - Mana STT (Speech-to-Text, Port 3020) - Mana TTS (Text-to-Speech, Port 3022) - Mana Image Gen (FLUX image generation, Port 3023) +- Mana Video Gen (LTX-Video generation, Port 3026) For setup documentation, see **[docs/WINDOWS_GPU_SERVER_SETUP.md](docs/WINDOWS_GPU_SERVER_SETUP.md)**. diff --git a/cloudflared-config.yml b/cloudflared-config.yml index b35fe6813..b4bc6ae72 100644 --- a/cloudflared-config.yml +++ b/cloudflared-config.yml @@ -119,6 +119,8 @@ ingress: service: http://192.168.178.11:3022 - hostname: gpu-img.mana.how service: http://192.168.178.11:3023 + - hostname: gpu-video.mana.how + service: http://192.168.178.11:3026 - hostname: gpu-ollama.mana.how service: http://192.168.178.11:11434 diff --git a/docker/prometheus/prometheus.yml b/docker/prometheus/prometheus.yml index 928a37d69..dcc2a2d89 100644 --- a/docker/prometheus/prometheus.yml +++ b/docker/prometheus/prometheus.yml @@ -203,6 +203,15 @@ scrape_configs: metrics_path: '/health' scrape_interval: 30s + # GPU: Video Generation (LTX-Video) + - job_name: 'gpu-video-gen' + static_configs: + - targets: ['192.168.178.11:3026'] + labels: + instance: 'gpu-server' + metrics_path: '/health' + scrape_interval: 30s + # ============================================ # Go Infrastructure Services # ============================================ @@ -339,6 +348,7 @@ scrape_configs: - https://gpu-stt.mana.how - https://gpu-tts.mana.how - https://gpu-img.mana.how + - https://gpu-video.mana.how relabel_configs: - source_labels: [__address__] target_label: __param_target diff --git a/docs/WINDOWS_GPU_SERVER_SETUP.md b/docs/WINDOWS_GPU_SERVER_SETUP.md index 9ed2d86bc..46637d62d 100644 --- a/docs/WINDOWS_GPU_SERVER_SETUP.md +++ b/docs/WINDOWS_GPU_SERVER_SETUP.md @@ -124,6 +124,7 @@ New-NetFirewallRule -DisplayName "Mana-STT" -Direction Inbound -LocalPort 3020 - New-NetFirewallRule -DisplayName "Mana-TTS" -Direction Inbound -LocalPort 3022 -Protocol TCP -Action Allow New-NetFirewallRule -DisplayName "Mana-Image-Gen" -Direction Inbound -LocalPort 3023 -Protocol TCP -Action Allow New-NetFirewallRule -DisplayName "Mana-LLM" -Direction Inbound -LocalPort 3025 -Protocol TCP -Action Allow +New-NetFirewallRule -DisplayName "Mana-Video-Gen" -Direction Inbound -LocalPort 3026 -Protocol TCP -Action Allow ``` --- @@ -459,6 +460,7 @@ Internet → Cloudflare → Mac Mini (gpu-proxy.py) → GPU Server (LAN) | mana-stt | `https://gpu-stt.mana.how` | | mana-tts | `https://gpu-tts.mana.how` | | mana-image-gen | `https://gpu-img.mana.how` | +| mana-video-gen | `https://gpu-video.mana.how` | | Ollama | `https://gpu-ollama.mana.how` | ```bash @@ -484,6 +486,12 @@ curl -X POST https://gpu-img.mana.how/generate \ -H "Content-Type: application/json" \ -d '{"prompt":"A cat","width":1024,"height":1024}' +# Video Generation (LTX-Video) +curl https://gpu-video.mana.how/health +curl -X POST https://gpu-video.mana.how/generate \ + -H "Content-Type: application/json" \ + -d '{"prompt":"Ocean waves crashing on rocks","width":704,"height":480}' + # Ollama direkt curl https://gpu-ollama.mana.how/api/tags ``` @@ -497,6 +505,7 @@ curl http://192.168.178.11:3025/health # mana-llm curl http://192.168.178.11:3020/health # mana-stt curl http://192.168.178.11:3022/health # mana-tts curl http://192.168.178.11:3023/health # mana-image-gen +curl http://192.168.178.11:3026/health # mana-video-gen curl http://192.168.178.11:11434/api/tags # Ollama ``` @@ -606,6 +615,7 @@ GPU Server (healthcheck.py → log-shipper.py) | GPU STT | `gpu-stt` | 3020 (`/health`) | | GPU TTS | `gpu-tts` | 3022 (`/health`) | | GPU Image Gen | `gpu-image-gen` | 3023 (`/health`) | +| GPU Video Gen | `gpu-video-gen` | 3026 (`/health`) | --- diff --git a/scripts/mac-mini/health-check.sh b/scripts/mac-mini/health-check.sh index 13d14c262..54644b3ca 100755 --- a/scripts/mac-mini/health-check.sh +++ b/scripts/mac-mini/health-check.sh @@ -264,6 +264,7 @@ check_service "GPU Ollama" "http://192.168.178.11:11434/api/version" 3 check_service "GPU STT" "http://192.168.178.11:3020/health" 3 check_service "GPU TTS" "http://192.168.178.11:3022/health" 3 check_service "GPU Image Gen" "http://192.168.178.11:3023/health" 3 +check_service "GPU Video Gen" "http://192.168.178.11:3026/health" 3 echo "" echo "Matrix:" diff --git a/scripts/mac-mini/status.sh b/scripts/mac-mini/status.sh index e1f475d8d..7958f4564 100755 --- a/scripts/mac-mini/status.sh +++ b/scripts/mac-mini/status.sh @@ -103,6 +103,7 @@ check_gpu_service "Ollama (LLM)" "http://192.168.178.11:11434/api/version" check_gpu_service "STT (Whisper)" "http://192.168.178.11:3020/health" check_gpu_service "TTS" "http://192.168.178.11:3022/health" check_gpu_service "Image Gen (FLUX)" "http://192.168.178.11:3023/health" +check_gpu_service "Video Gen (LTX)" "http://192.168.178.11:3026/health" # ============================================ # Network/Tunnel Status diff --git a/services/mana-video-gen/.env.example b/services/mana-video-gen/.env.example new file mode 100644 index 000000000..f09f531fe --- /dev/null +++ b/services/mana-video-gen/.env.example @@ -0,0 +1,28 @@ +# Mana Video Generation - Environment Variables + +# Service +PORT=3026 + +# Model +LTX_MODEL_ID=Lightricks/LTX-Video +DEVICE=cuda + +# Defaults +DEFAULT_WIDTH=704 +DEFAULT_HEIGHT=480 +DEFAULT_NUM_FRAMES=81 +DEFAULT_FPS=25 +DEFAULT_STEPS=30 +DEFAULT_GUIDANCE_SCALE=7.5 + +# Limits +MAX_PROMPT_LENGTH=2000 +MAX_FRAMES=161 +MAX_STEPS=50 +GENERATION_TIMEOUT=600 + +# Output +OUTPUT_DIR=/tmp/mana-video-gen + +# CORS +CORS_ORIGINS=https://mana.how,https://picture.mana.how,https://chat.mana.how,http://localhost:5173 diff --git a/services/mana-video-gen/CLAUDE.md b/services/mana-video-gen/CLAUDE.md new file mode 100644 index 000000000..473bd809f --- /dev/null +++ b/services/mana-video-gen/CLAUDE.md @@ -0,0 +1,172 @@ +# CLAUDE.md - Mana Video Generation Service + +## Service Overview + +AI video generation microservice using LTX-Video via HuggingFace diffusers: + +- **Port**: 3026 +- **Framework**: Python + FastAPI +- **Model**: LTX-Video (~2B params, Lightricks) +- **Backend**: diffusers + PyTorch CUDA +- **Target Hardware**: NVIDIA RTX 3090 (24 GB VRAM) + +## Features + +- **Fast generation**: 10-30 seconds per clip on RTX 3090 +- **Text-to-video**: 480p-720p, up to ~6 seconds +- **Low VRAM**: ~10 GB — leaves room for other GPU services +- **Lazy model loading**: Model loads on first request, stays in VRAM +- **VRAM management**: POST /unload to free GPU memory for other services +- **MP4 output**: Direct video file serving + +## Commands + +```bash +# Setup (installs PyTorch CUDA + diffusers + LTX-Video) +chmod +x setup.sh && ./setup.sh + +# Development +source .venv/bin/activate +uvicorn app.main:app --host 0.0.0.0 --port 3026 --reload + +# Test +curl http://localhost:3026/health +curl -X POST http://localhost:3026/generate \ + -H "Content-Type: application/json" \ + -d '{"prompt": "A cat walking in a garden"}' | jq + +# Free VRAM (e.g. before running image generation) +curl -X POST http://localhost:3026/unload +``` + +## File Structure + +``` +services/mana-video-gen/ +├── app/ +│ ├── __init__.py +│ ├── main.py # FastAPI endpoints +│ └── ltx_service.py # LTX-Video diffusers pipeline +├── setup.sh # Setup script (CUDA + Python deps) +├── requirements.txt +├── .env.example +└── CLAUDE.md +``` + +## API Endpoints + +| Endpoint | Method | Purpose | +|----------|--------|---------| +| `/health` | GET | Health check + GPU info | +| `/models` | GET | Model info | +| `/generate` | POST | Generate video from text prompt | +| `/videos/{filename}` | GET | Serve generated video | +| `/videos/{filename}` | DELETE | Delete video | +| `/unload` | POST | Unload model, free VRAM | +| `/cleanup` | POST | Clean old videos | + +## Generate Request + +```json +{ + "prompt": "A timelapse of a flower blooming", + "negative_prompt": "blurry, low quality", + "width": 704, + "height": 480, + "num_frames": 81, + "fps": 25, + "steps": 30, + "guidance_scale": 7.5, + "seed": null +} +``` + +## Generate Response + +```json +{ + "success": true, + "video_url": "/videos/abc123.mp4", + "prompt": "A timelapse of a flower blooming", + "width": 704, + "height": 480, + "num_frames": 81, + "fps": 25, + "duration": 3.24, + "steps": 30, + "seed": 42, + "generation_time": 18.5 +} +``` + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `PORT` | `3026` | Service port | +| `LTX_MODEL_ID` | `Lightricks/LTX-Video` | HuggingFace model ID | +| `DEVICE` | `cuda` | PyTorch device | +| `DEFAULT_WIDTH` | `704` | Default video width | +| `DEFAULT_HEIGHT` | `480` | Default video height | +| `DEFAULT_NUM_FRAMES` | `81` | Default frame count (~3.2s) | +| `DEFAULT_FPS` | `25` | Default framerate | +| `DEFAULT_STEPS` | `30` | Default inference steps | +| `DEFAULT_GUIDANCE_SCALE` | `7.5` | Default CFG scale | +| `GENERATION_TIMEOUT` | `600` | Timeout in seconds | +| `MAX_PROMPT_LENGTH` | `2000` | Max prompt chars | +| `MAX_FRAMES` | `161` | Max frames (~6.4s) | +| `CORS_ORIGINS` | (production URLs) | CORS config | + +## Model Details + +### LTX-Video + +- **Parameters**: ~2 billion +- **License**: Lightricks Open License (commercial use allowed) +- **Download size**: ~4 GB (auto-downloaded on first use) +- **VRAM usage**: ~10 GB +- **Optimal settings**: 704x480, 30 steps, 7.5 guidance +- **Speed on RTX 3090**: 10-30 seconds per clip + +## VRAM Management + +The GPU server runs multiple AI services. LTX-Video uses ~10 GB VRAM: + +- Model loads lazily on first `/generate` request +- Use `POST /unload` to free VRAM when not generating videos +- Other services (mana-image-gen, mana-stt, mana-tts) share the same GPU +- `enable_model_cpu_offload()` moves unused layers to CPU automatically + +## Performance (RTX 3090) + +| Resolution | Frames | Steps | Time | +|------------|--------|-------|------| +| 512x320 | 41 | 20 | ~8s | +| 704x480 | 81 | 30 | ~20s | +| 704x480 | 41 | 20 | ~10s | +| 1280x720 | 41 | 30 | ~45s | + +## Integration + +Used by: +- **Picture App** — video generation alongside images +- **Chat App** — inline video generation +- **Matrix Bots** — video generation via chat commands + +### Example (TypeScript) + +```typescript +const response = await fetch('http://192.168.178.11:3026/generate', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + prompt: 'Ocean waves crashing on rocks at sunset', + width: 704, + height: 480, + num_frames: 81, + }), +}); + +const result = await response.json(); +const videoUrl = `http://192.168.178.11:3026${result.video_url}`; +``` diff --git a/services/mana-video-gen/app/__init__.py b/services/mana-video-gen/app/__init__.py new file mode 100644 index 000000000..416ecc097 --- /dev/null +++ b/services/mana-video-gen/app/__init__.py @@ -0,0 +1 @@ +"""Mana Video Generation - AI Video Generation Microservice.""" diff --git a/services/mana-video-gen/app/ltx_service.py b/services/mana-video-gen/app/ltx_service.py new file mode 100644 index 000000000..26d3f856f --- /dev/null +++ b/services/mana-video-gen/app/ltx_service.py @@ -0,0 +1,277 @@ +""" +LTX-Video Service + +Uses LTX-Video 0.9.x via HuggingFace diffusers for fast video generation. +Optimized for NVIDIA GPUs (CUDA). +""" + +import asyncio +import logging +import os +import time +import uuid +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +logger = logging.getLogger(__name__) + +# Configuration +MODEL_ID = os.getenv("LTX_MODEL_ID", "Lightricks/LTX-Video") +DEFAULT_WIDTH = int(os.getenv("DEFAULT_WIDTH", "704")) +DEFAULT_HEIGHT = int(os.getenv("DEFAULT_HEIGHT", "480")) +DEFAULT_NUM_FRAMES = int(os.getenv("DEFAULT_NUM_FRAMES", "81")) # ~3.2s at 25fps +DEFAULT_FPS = int(os.getenv("DEFAULT_FPS", "25")) +DEFAULT_STEPS = int(os.getenv("DEFAULT_STEPS", "30")) +DEFAULT_GUIDANCE_SCALE = float(os.getenv("DEFAULT_GUIDANCE_SCALE", "7.5")) +GENERATION_TIMEOUT = int(os.getenv("GENERATION_TIMEOUT", "600")) # seconds +DEVICE = os.getenv("DEVICE", "cuda") + +# Output directory for generated videos +OUTPUT_DIR = Path(os.getenv("OUTPUT_DIR", "/tmp/mana-video-gen")) +OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + +# Pipeline singleton (lazy loaded) +_pipeline = None +_pipeline_lock = asyncio.Lock() + + +@dataclass +class GenerationResult: + """Result of video generation.""" + + video_path: str + prompt: str + width: int + height: int + num_frames: int + fps: int + steps: int + seed: int + generation_time: float + + +def is_model_available() -> bool: + """Check if the model can be loaded (CUDA available + diffusers installed).""" + try: + import torch + + return torch.cuda.is_available() + except ImportError: + return False + + +def get_model_info() -> dict: + """Get information about the LTX-Video model.""" + try: + import torch + + gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A" + vram_gb = ( + round(torch.cuda.get_device_properties(0).total_mem / 1e9, 1) + if torch.cuda.is_available() + else 0 + ) + except ImportError: + gpu_name = "N/A" + vram_gb = 0 + + return { + "model_id": MODEL_ID, + "model_name": "LTX-Video", + "parameters": "~2B", + "license": "Lightricks Open License (commercial OK)", + "cuda_available": is_model_available(), + "gpu": gpu_name, + "vram_gb": vram_gb, + "default_resolution": f"{DEFAULT_WIDTH}x{DEFAULT_HEIGHT}", + "default_frames": DEFAULT_NUM_FRAMES, + "default_fps": DEFAULT_FPS, + "default_steps": DEFAULT_STEPS, + "pipeline_loaded": _pipeline is not None, + } + + +async def _load_pipeline(): + """Load the LTX-Video pipeline (lazy, thread-safe).""" + global _pipeline + + async with _pipeline_lock: + if _pipeline is not None: + return _pipeline + + logger.info(f"Loading LTX-Video pipeline from {MODEL_ID}...") + load_start = time.time() + + def _load(): + import torch + from diffusers import LTXPipeline + + pipe = LTXPipeline.from_pretrained( + MODEL_ID, + torch_dtype=torch.bfloat16, + ) + pipe.to(DEVICE) + # Enable memory optimizations + pipe.enable_model_cpu_offload() + return pipe + + loop = asyncio.get_event_loop() + _pipeline = await loop.run_in_executor(None, _load) + + load_time = time.time() - load_start + logger.info(f"LTX-Video pipeline loaded in {load_time:.1f}s") + return _pipeline + + +async def unload_pipeline(): + """Unload pipeline to free VRAM.""" + global _pipeline + + async with _pipeline_lock: + if _pipeline is not None: + import torch + + del _pipeline + _pipeline = None + torch.cuda.empty_cache() + logger.info("LTX-Video pipeline unloaded, VRAM freed") + + +async def generate_video( + prompt: str, + width: int = DEFAULT_WIDTH, + height: int = DEFAULT_HEIGHT, + num_frames: int = DEFAULT_NUM_FRAMES, + fps: int = DEFAULT_FPS, + steps: int = DEFAULT_STEPS, + guidance_scale: float = DEFAULT_GUIDANCE_SCALE, + seed: Optional[int] = None, + negative_prompt: str = "", +) -> GenerationResult: + """ + Generate a video from a text prompt using LTX-Video. + + Args: + prompt: Text prompt for video generation + width: Video width (default 704) + height: Video height (default 480) + num_frames: Number of frames (default 81 = ~3.2s at 25fps) + fps: Frames per second for output (default 25) + steps: Number of inference steps (default 30) + guidance_scale: CFG scale (default 7.5) + seed: Random seed (None for random) + negative_prompt: Negative prompt + + Returns: + GenerationResult with video path and metadata + + Raises: + RuntimeError: If model not available or generation fails + """ + if not is_model_available(): + raise RuntimeError("CUDA not available - cannot generate video") + + pipe = await _load_pipeline() + + video_id = str(uuid.uuid4())[:8] + output_path = OUTPUT_DIR / f"{video_id}.mp4" + + import torch + + actual_seed = seed if seed is not None else torch.randint(0, 2**32, (1,)).item() + generator = torch.Generator(device="cpu").manual_seed(actual_seed) + + logger.info( + f"Generating video: {width}x{height}, {num_frames} frames, {steps} steps, seed={actual_seed}" + ) + + start_time = time.time() + + def _generate(): + result = pipe( + prompt=prompt, + negative_prompt=negative_prompt or None, + width=width, + height=height, + num_frames=num_frames, + num_inference_steps=steps, + guidance_scale=guidance_scale, + generator=generator, + ) + return result + + try: + loop = asyncio.get_event_loop() + result = await asyncio.wait_for( + loop.run_in_executor(None, _generate), + timeout=GENERATION_TIMEOUT, + ) + + generation_time = time.time() - start_time + + # Export to MP4 + from diffusers.utils import export_to_video + + export_to_video(result.frames[0], str(output_path), fps=fps) + + if not output_path.exists(): + raise RuntimeError("Video generation completed but output file not found") + + file_size_mb = output_path.stat().st_size / (1024 * 1024) + duration_s = num_frames / fps + + logger.info( + f"Video generated: {output_path} ({width}x{height}, {duration_s:.1f}s, " + f"{file_size_mb:.1f}MB, took {generation_time:.1f}s)" + ) + + return GenerationResult( + video_path=str(output_path), + prompt=prompt, + width=width, + height=height, + num_frames=num_frames, + fps=fps, + steps=steps, + seed=actual_seed, + generation_time=generation_time, + ) + + except asyncio.TimeoutError: + logger.error(f"Video generation timed out after {GENERATION_TIMEOUT}s") + raise RuntimeError(f"Generation timed out after {GENERATION_TIMEOUT} seconds") + except Exception as e: + logger.error(f"Video generation error: {e}") + raise + + +def cleanup_video(video_path: str) -> bool: + """Delete a generated video file.""" + try: + path = Path(video_path) + if path.exists() and path.parent == OUTPUT_DIR: + path.unlink() + return True + except Exception as e: + logger.warning(f"Failed to cleanup video {video_path}: {e}") + return False + + +def cleanup_old_videos(max_age_hours: int = 24) -> int: + """Clean up videos older than max_age_hours.""" + cleaned = 0 + cutoff = time.time() - (max_age_hours * 3600) + + try: + for file in OUTPUT_DIR.iterdir(): + if file.is_file() and file.stat().st_mtime < cutoff: + file.unlink() + cleaned += 1 + except Exception as e: + logger.warning(f"Cleanup error: {e}") + + if cleaned > 0: + logger.info(f"Cleaned up {cleaned} old videos") + + return cleaned diff --git a/services/mana-video-gen/app/main.py b/services/mana-video-gen/app/main.py new file mode 100644 index 000000000..ed0109def --- /dev/null +++ b/services/mana-video-gen/app/main.py @@ -0,0 +1,404 @@ +""" +Mana Video Generation - AI Video Generation Microservice + +Provides video generation using LTX-Video via HuggingFace diffusers. +Optimized for NVIDIA RTX 3090 (CUDA). + +API: +- POST /generate - Generate video from text prompt +- GET /health - Health check +- GET /models - Model information +- POST /unload - Free VRAM by unloading model +""" + +import logging +import os +from contextlib import asynccontextmanager +from pathlib import Path +from typing import Optional + +from fastapi import FastAPI, HTTPException, Response, BackgroundTasks +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import FileResponse +from pydantic import BaseModel, Field + +from .ltx_service import ( + generate_video, + unload_pipeline, + is_model_available, + get_model_info, + cleanup_video, + cleanup_old_videos, + DEFAULT_WIDTH, + DEFAULT_HEIGHT, + DEFAULT_NUM_FRAMES, + DEFAULT_FPS, + DEFAULT_STEPS, + DEFAULT_GUIDANCE_SCALE, +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + +# Configuration from environment +PORT = int(os.getenv("PORT", "3026")) +MAX_PROMPT_LENGTH = int(os.getenv("MAX_PROMPT_LENGTH", "2000")) +MIN_DIMENSION = int(os.getenv("MIN_DIMENSION", "256")) +MAX_DIMENSION = int(os.getenv("MAX_DIMENSION", "1280")) +MAX_FRAMES = int(os.getenv("MAX_FRAMES", "161")) # ~6.4s at 25fps +MAX_STEPS = int(os.getenv("MAX_STEPS", "50")) +CORS_ORIGINS = os.getenv( + "CORS_ORIGINS", + "https://mana.how,https://picture.mana.how,https://chat.mana.how,http://localhost:5173", +).split(",") + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Application lifespan manager for startup/shutdown.""" + logger.info(f"Starting Mana Video Generation service on port {PORT}") + + if is_model_available(): + info = get_model_info() + logger.info(f"CUDA available: {info['gpu']} ({info['vram_gb']} GB VRAM)") + else: + logger.warning("CUDA not available - service will return errors until GPU is accessible") + + # Cleanup old videos on startup + cleanup_old_videos(max_age_hours=24) + + yield + + # Unload model on shutdown + await unload_pipeline() + logger.info("Shutting down Mana Video Generation service") + + +# Create FastAPI app +app = FastAPI( + title="Mana Video Generation", + description="AI video generation service using LTX-Video", + version="1.0.0", + lifespan=lifespan, +) + +# CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=CORS_ORIGINS, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +# ============================================================================ +# Request/Response Models +# ============================================================================ + + +class GenerateRequest(BaseModel): + """Request for video generation.""" + + prompt: str = Field( + ..., + description="Text prompt for video generation", + min_length=1, + max_length=2000, + ) + negative_prompt: str = Field( + "", + description="Negative prompt (what to avoid)", + max_length=1000, + ) + width: int = Field( + DEFAULT_WIDTH, + ge=256, + le=1280, + description="Video width in pixels (must be divisible by 32)", + ) + height: int = Field( + DEFAULT_HEIGHT, + ge=256, + le=1280, + description="Video height in pixels (must be divisible by 32)", + ) + num_frames: int = Field( + DEFAULT_NUM_FRAMES, + ge=9, + le=161, + description="Number of frames (81 = ~3.2s at 25fps)", + ) + fps: int = Field( + DEFAULT_FPS, + ge=8, + le=30, + description="Frames per second for output video", + ) + steps: int = Field( + DEFAULT_STEPS, + ge=1, + le=50, + description="Number of inference steps", + ) + guidance_scale: float = Field( + DEFAULT_GUIDANCE_SCALE, + ge=1.0, + le=20.0, + description="Classifier-free guidance scale", + ) + seed: Optional[int] = Field( + None, + ge=0, + description="Random seed (None for random)", + ) + + +class GenerateResponse(BaseModel): + """Response for video generation.""" + + success: bool + video_url: str + prompt: str + width: int + height: int + num_frames: int + fps: int + duration: float + steps: int + seed: int + generation_time: float + + +class HealthResponse(BaseModel): + """Health check response.""" + + status: str + service: str + cuda_available: bool + gpu: str + + +class ModelsResponse(BaseModel): + """Available models response.""" + + ltx_video: dict + + +class ErrorResponse(BaseModel): + """Error response.""" + + error: str + detail: str + + +# ============================================================================ +# Health & Info Endpoints +# ============================================================================ + + +@app.get("/health", response_model=HealthResponse) +async def health_check(): + """Check service health and CUDA availability.""" + info = get_model_info() + return HealthResponse( + status="healthy" if is_model_available() else "degraded", + service="mana-video-gen", + cuda_available=info["cuda_available"], + gpu=info["gpu"], + ) + + +@app.get("/models", response_model=ModelsResponse) +async def get_models(): + """Get information about available models.""" + return ModelsResponse(ltx_video=get_model_info()) + + +# ============================================================================ +# Video Generation Endpoints +# ============================================================================ + + +@app.post("/generate", response_model=GenerateResponse) +async def generate(request: GenerateRequest, background_tasks: BackgroundTasks): + """ + Generate a video from a text prompt using LTX-Video. + + LTX-Video generates 480p video clips in 10-30 seconds on RTX 3090. + The model is loaded on first request and stays in VRAM until /unload. + """ + # Validate prompt + if len(request.prompt) > MAX_PROMPT_LENGTH: + raise HTTPException( + status_code=400, + detail=f"Prompt exceeds maximum length of {MAX_PROMPT_LENGTH} characters", + ) + + if not request.prompt.strip(): + raise HTTPException(status_code=400, detail="Prompt cannot be empty") + + # Validate dimensions are divisible by 32 (required by VAE) + if request.width % 32 != 0: + raise HTTPException( + status_code=400, + detail=f"Width must be divisible by 32 (got {request.width})", + ) + if request.height % 32 != 0: + raise HTTPException( + status_code=400, + detail=f"Height must be divisible by 32 (got {request.height})", + ) + + # Validate frames + if request.num_frames > MAX_FRAMES: + raise HTTPException( + status_code=400, + detail=f"num_frames must be at most {MAX_FRAMES}", + ) + + # Validate steps + if request.steps > MAX_STEPS: + raise HTTPException( + status_code=400, + detail=f"Steps must be at most {MAX_STEPS}", + ) + + # Check CUDA availability + if not is_model_available(): + raise HTTPException( + status_code=503, + detail="Video generation service not available. CUDA not detected.", + ) + + try: + result = await generate_video( + prompt=request.prompt, + negative_prompt=request.negative_prompt, + width=request.width, + height=request.height, + num_frames=request.num_frames, + fps=request.fps, + steps=request.steps, + guidance_scale=request.guidance_scale, + seed=request.seed, + ) + + video_filename = Path(result.video_path).name + video_url = f"/videos/{video_filename}" + + return GenerateResponse( + success=True, + video_url=video_url, + prompt=result.prompt, + width=result.width, + height=result.height, + num_frames=result.num_frames, + fps=result.fps, + duration=round(result.num_frames / result.fps, 2), + steps=result.steps, + seed=result.seed, + generation_time=round(result.generation_time, 2), + ) + + except RuntimeError as e: + logger.error(f"Generation error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + except Exception as e: + logger.error(f"Unexpected error: {e}") + raise HTTPException(status_code=500, detail=f"Video generation failed: {e}") + + +@app.get("/videos/{filename}") +async def get_video(filename: str): + """Serve a generated video.""" + from .ltx_service import OUTPUT_DIR + + # Security: only allow specific extensions and no path traversal + if ".." in filename or "/" in filename or "\\" in filename: + raise HTTPException(status_code=400, detail="Invalid filename") + + allowed_extensions = {".mp4", ".webm"} + ext = Path(filename).suffix.lower() + if ext not in allowed_extensions: + raise HTTPException(status_code=400, detail="Invalid file type") + + video_path = OUTPUT_DIR / filename + if not video_path.exists(): + raise HTTPException(status_code=404, detail="Video not found") + + media_type = "video/mp4" if ext == ".mp4" else "video/webm" + return FileResponse(video_path, media_type=media_type) + + +@app.delete("/videos/{filename}") +async def delete_video(filename: str): + """Delete a generated video.""" + from .ltx_service import OUTPUT_DIR + + if ".." in filename or "/" in filename or "\\" in filename: + raise HTTPException(status_code=400, detail="Invalid filename") + + video_path = OUTPUT_DIR / filename + if not video_path.exists(): + raise HTTPException(status_code=404, detail="Video not found") + + if cleanup_video(str(video_path)): + return {"success": True, "message": f"Video {filename} deleted"} + else: + raise HTTPException(status_code=500, detail="Failed to delete video") + + +# ============================================================================ +# VRAM Management Endpoints +# ============================================================================ + + +@app.post("/unload") +async def unload_model(): + """Unload the model from VRAM to free memory for other services.""" + await unload_pipeline() + return {"success": True, "message": "Model unloaded, VRAM freed"} + + +# ============================================================================ +# Maintenance Endpoints +# ============================================================================ + + +@app.post("/cleanup") +async def cleanup_videos(max_age_hours: int = 24): + """Clean up old generated videos.""" + cleaned = cleanup_old_videos(max_age_hours) + return {"success": True, "cleaned": cleaned} + + +# ============================================================================ +# Error Handler +# ============================================================================ + + +@app.exception_handler(Exception) +async def global_exception_handler(request, exc): + """Handle uncaught exceptions.""" + logger.error(f"Unhandled exception: {exc}") + return Response( + content=f'{{"error": "Internal server error", "detail": "{str(exc)}"}}', + status_code=500, + media_type="application/json", + ) + + +# ============================================================================ +# Main +# ============================================================================ + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run(app, host="0.0.0.0", port=PORT) diff --git a/services/mana-video-gen/requirements.txt b/services/mana-video-gen/requirements.txt new file mode 100644 index 000000000..0e23c71eb --- /dev/null +++ b/services/mana-video-gen/requirements.txt @@ -0,0 +1,20 @@ +# Web Framework +fastapi>=0.115.0 +uvicorn[standard]>=0.34.0 +python-multipart>=0.0.20 + +# ML / Video Generation +torch>=2.5.0 +diffusers>=0.32.0 +transformers>=4.46.0 +accelerate>=1.2.0 +sentencepiece>=0.2.0 + +# Video Processing +imageio[ffmpeg]>=2.36.0 +pillow>=10.0.0 +numpy>=1.26.0 + +# Utilities +aiofiles>=24.1.0 +httpx>=0.27.0 diff --git a/services/mana-video-gen/setup.sh b/services/mana-video-gen/setup.sh new file mode 100755 index 000000000..2409f1bd0 --- /dev/null +++ b/services/mana-video-gen/setup.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# ============================================================================ +# Mana Video Generation - Setup Script +# Installs LTX-Video for NVIDIA GPU (CUDA) +# ============================================================================ + +set -e + +echo "============================================" +echo " Mana Video Generation - Setup" +echo " Model: LTX-Video (~2B params)" +echo " Requires: NVIDIA GPU with CUDA" +echo "============================================" +echo "" + +# ---- Check NVIDIA GPU ---- +echo "[1/4] Checking GPU..." + +if command -v nvidia-smi &> /dev/null; then + GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1) + GPU_MEM=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader | head -1) + echo " GPU: $GPU_NAME ($GPU_MEM)" +else + echo " WARNING: nvidia-smi not found. CUDA may not be available." + echo " Install NVIDIA drivers and CUDA toolkit first." +fi + +# ---- Python venv ---- +echo "" +echo "[2/4] Setting up Python environment..." + +if [ ! -d ".venv" ]; then + python3 -m venv .venv + echo " Created virtual environment" +else + echo " Virtual environment already exists" +fi + +source .venv/bin/activate +pip install --upgrade pip -q + +# ---- Install dependencies ---- +echo "" +echo "[3/4] Installing dependencies..." +echo " This will download PyTorch + LTX-Video (~5 GB total)" + +# Install PyTorch with CUDA support +pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121 -q + +# Install remaining dependencies +pip install -r requirements.txt -q + +echo " Dependencies installed" + +# ---- Verify installation ---- +echo "" +echo "[4/4] Verifying installation..." + +python3 -c " +import torch +print(f' PyTorch: {torch.__version__}') +print(f' CUDA available: {torch.cuda.is_available()}') +if torch.cuda.is_available(): + print(f' GPU: {torch.cuda.get_device_name(0)}') + print(f' VRAM: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB') + +import diffusers +print(f' Diffusers: {diffusers.__version__}') +print() +print(' LTX-Video model will be downloaded on first generation (~4 GB)') +" + +# ---- Create output directory ---- +mkdir -p /tmp/mana-video-gen + +echo "" +echo "============================================" +echo " Setup complete!" +echo "" +echo " Start the service:" +echo " source .venv/bin/activate" +echo " uvicorn app.main:app --host 0.0.0.0 --port 3026 --reload" +echo "" +echo " Test:" +echo " curl http://localhost:3026/health" +echo " curl -X POST http://localhost:3026/generate \\" +echo " -H 'Content-Type: application/json' \\" +echo " -d '{\"prompt\": \"A cat walking in a garden\"}'" +echo "============================================"