mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 18:41:08 +02:00
feat(mana-video-gen): add AI video generation service with LTX-Video
New GPU service for fast text-to-video generation using LTX-Video (~2B params) on the RTX 3090. Generates 480p clips in 10-30 seconds, uses ~10GB VRAM. Includes Cloudflare Tunnel route, Prometheus monitoring, and health checks. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
28b953255b
commit
06107f6a52
13 changed files with 1017 additions and 0 deletions
|
|
@ -183,6 +183,7 @@ manacore-monorepo/
|
|||
│ ├── mana-media/ # Media platform: CAS, thumbnails (Hono + Bun)
|
||||
│ ├── mana-llm/ # LLM abstraction (Python/FastAPI)
|
||||
│ ├── mana-image-gen/ # AI image generation with FLUX (Python/FastAPI)
|
||||
│ ├── mana-video-gen/ # AI video generation with LTX-Video (Python/FastAPI)
|
||||
│ ├── mana-stt/ # Speech-to-text (Python/FastAPI)
|
||||
│ ├── mana-tts/ # Text-to-speech (Python/FastAPI)
|
||||
│ ├── mana-voice-bot/ # Voice assistant (Python/FastAPI)
|
||||
|
|
@ -920,6 +921,7 @@ Host mana-gpu
|
|||
- Mana STT (Speech-to-Text, Port 3020)
|
||||
- Mana TTS (Text-to-Speech, Port 3022)
|
||||
- Mana Image Gen (FLUX image generation, Port 3023)
|
||||
- Mana Video Gen (LTX-Video generation, Port 3026)
|
||||
|
||||
For setup documentation, see **[docs/WINDOWS_GPU_SERVER_SETUP.md](docs/WINDOWS_GPU_SERVER_SETUP.md)**.
|
||||
|
||||
|
|
|
|||
|
|
@ -119,6 +119,8 @@ ingress:
|
|||
service: http://192.168.178.11:3022
|
||||
- hostname: gpu-img.mana.how
|
||||
service: http://192.168.178.11:3023
|
||||
- hostname: gpu-video.mana.how
|
||||
service: http://192.168.178.11:3026
|
||||
- hostname: gpu-ollama.mana.how
|
||||
service: http://192.168.178.11:11434
|
||||
|
||||
|
|
|
|||
|
|
@ -203,6 +203,15 @@ scrape_configs:
|
|||
metrics_path: '/health'
|
||||
scrape_interval: 30s
|
||||
|
||||
# GPU: Video Generation (LTX-Video)
|
||||
- job_name: 'gpu-video-gen'
|
||||
static_configs:
|
||||
- targets: ['192.168.178.11:3026']
|
||||
labels:
|
||||
instance: 'gpu-server'
|
||||
metrics_path: '/health'
|
||||
scrape_interval: 30s
|
||||
|
||||
# ============================================
|
||||
# Go Infrastructure Services
|
||||
# ============================================
|
||||
|
|
@ -339,6 +348,7 @@ scrape_configs:
|
|||
- https://gpu-stt.mana.how
|
||||
- https://gpu-tts.mana.how
|
||||
- https://gpu-img.mana.how
|
||||
- https://gpu-video.mana.how
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
|
|
|
|||
|
|
@ -124,6 +124,7 @@ New-NetFirewallRule -DisplayName "Mana-STT" -Direction Inbound -LocalPort 3020 -
|
|||
New-NetFirewallRule -DisplayName "Mana-TTS" -Direction Inbound -LocalPort 3022 -Protocol TCP -Action Allow
|
||||
New-NetFirewallRule -DisplayName "Mana-Image-Gen" -Direction Inbound -LocalPort 3023 -Protocol TCP -Action Allow
|
||||
New-NetFirewallRule -DisplayName "Mana-LLM" -Direction Inbound -LocalPort 3025 -Protocol TCP -Action Allow
|
||||
New-NetFirewallRule -DisplayName "Mana-Video-Gen" -Direction Inbound -LocalPort 3026 -Protocol TCP -Action Allow
|
||||
```
|
||||
|
||||
---
|
||||
|
|
@ -459,6 +460,7 @@ Internet → Cloudflare → Mac Mini (gpu-proxy.py) → GPU Server (LAN)
|
|||
| mana-stt | `https://gpu-stt.mana.how` |
|
||||
| mana-tts | `https://gpu-tts.mana.how` |
|
||||
| mana-image-gen | `https://gpu-img.mana.how` |
|
||||
| mana-video-gen | `https://gpu-video.mana.how` |
|
||||
| Ollama | `https://gpu-ollama.mana.how` |
|
||||
|
||||
```bash
|
||||
|
|
@ -484,6 +486,12 @@ curl -X POST https://gpu-img.mana.how/generate \
|
|||
-H "Content-Type: application/json" \
|
||||
-d '{"prompt":"A cat","width":1024,"height":1024}'
|
||||
|
||||
# Video Generation (LTX-Video)
|
||||
curl https://gpu-video.mana.how/health
|
||||
curl -X POST https://gpu-video.mana.how/generate \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"prompt":"Ocean waves crashing on rocks","width":704,"height":480}'
|
||||
|
||||
# Ollama direkt
|
||||
curl https://gpu-ollama.mana.how/api/tags
|
||||
```
|
||||
|
|
@ -497,6 +505,7 @@ curl http://192.168.178.11:3025/health # mana-llm
|
|||
curl http://192.168.178.11:3020/health # mana-stt
|
||||
curl http://192.168.178.11:3022/health # mana-tts
|
||||
curl http://192.168.178.11:3023/health # mana-image-gen
|
||||
curl http://192.168.178.11:3026/health # mana-video-gen
|
||||
curl http://192.168.178.11:11434/api/tags # Ollama
|
||||
```
|
||||
|
||||
|
|
@ -606,6 +615,7 @@ GPU Server (healthcheck.py → log-shipper.py)
|
|||
| GPU STT | `gpu-stt` | 3020 (`/health`) |
|
||||
| GPU TTS | `gpu-tts` | 3022 (`/health`) |
|
||||
| GPU Image Gen | `gpu-image-gen` | 3023 (`/health`) |
|
||||
| GPU Video Gen | `gpu-video-gen` | 3026 (`/health`) |
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
|
@ -264,6 +264,7 @@ check_service "GPU Ollama" "http://192.168.178.11:11434/api/version" 3
|
|||
check_service "GPU STT" "http://192.168.178.11:3020/health" 3
|
||||
check_service "GPU TTS" "http://192.168.178.11:3022/health" 3
|
||||
check_service "GPU Image Gen" "http://192.168.178.11:3023/health" 3
|
||||
check_service "GPU Video Gen" "http://192.168.178.11:3026/health" 3
|
||||
|
||||
echo ""
|
||||
echo "Matrix:"
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@ check_gpu_service "Ollama (LLM)" "http://192.168.178.11:11434/api/version"
|
|||
check_gpu_service "STT (Whisper)" "http://192.168.178.11:3020/health"
|
||||
check_gpu_service "TTS" "http://192.168.178.11:3022/health"
|
||||
check_gpu_service "Image Gen (FLUX)" "http://192.168.178.11:3023/health"
|
||||
check_gpu_service "Video Gen (LTX)" "http://192.168.178.11:3026/health"
|
||||
|
||||
# ============================================
|
||||
# Network/Tunnel Status
|
||||
|
|
|
|||
28
services/mana-video-gen/.env.example
Normal file
28
services/mana-video-gen/.env.example
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# Mana Video Generation - Environment Variables
|
||||
|
||||
# Service
|
||||
PORT=3026
|
||||
|
||||
# Model
|
||||
LTX_MODEL_ID=Lightricks/LTX-Video
|
||||
DEVICE=cuda
|
||||
|
||||
# Defaults
|
||||
DEFAULT_WIDTH=704
|
||||
DEFAULT_HEIGHT=480
|
||||
DEFAULT_NUM_FRAMES=81
|
||||
DEFAULT_FPS=25
|
||||
DEFAULT_STEPS=30
|
||||
DEFAULT_GUIDANCE_SCALE=7.5
|
||||
|
||||
# Limits
|
||||
MAX_PROMPT_LENGTH=2000
|
||||
MAX_FRAMES=161
|
||||
MAX_STEPS=50
|
||||
GENERATION_TIMEOUT=600
|
||||
|
||||
# Output
|
||||
OUTPUT_DIR=/tmp/mana-video-gen
|
||||
|
||||
# CORS
|
||||
CORS_ORIGINS=https://mana.how,https://picture.mana.how,https://chat.mana.how,http://localhost:5173
|
||||
172
services/mana-video-gen/CLAUDE.md
Normal file
172
services/mana-video-gen/CLAUDE.md
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
# CLAUDE.md - Mana Video Generation Service
|
||||
|
||||
## Service Overview
|
||||
|
||||
AI video generation microservice using LTX-Video via HuggingFace diffusers:
|
||||
|
||||
- **Port**: 3026
|
||||
- **Framework**: Python + FastAPI
|
||||
- **Model**: LTX-Video (~2B params, Lightricks)
|
||||
- **Backend**: diffusers + PyTorch CUDA
|
||||
- **Target Hardware**: NVIDIA RTX 3090 (24 GB VRAM)
|
||||
|
||||
## Features
|
||||
|
||||
- **Fast generation**: 10-30 seconds per clip on RTX 3090
|
||||
- **Text-to-video**: 480p-720p, up to ~6 seconds
|
||||
- **Low VRAM**: ~10 GB — leaves room for other GPU services
|
||||
- **Lazy model loading**: Model loads on first request, stays in VRAM
|
||||
- **VRAM management**: POST /unload to free GPU memory for other services
|
||||
- **MP4 output**: Direct video file serving
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Setup (installs PyTorch CUDA + diffusers + LTX-Video)
|
||||
chmod +x setup.sh && ./setup.sh
|
||||
|
||||
# Development
|
||||
source .venv/bin/activate
|
||||
uvicorn app.main:app --host 0.0.0.0 --port 3026 --reload
|
||||
|
||||
# Test
|
||||
curl http://localhost:3026/health
|
||||
curl -X POST http://localhost:3026/generate \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"prompt": "A cat walking in a garden"}' | jq
|
||||
|
||||
# Free VRAM (e.g. before running image generation)
|
||||
curl -X POST http://localhost:3026/unload
|
||||
```
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
services/mana-video-gen/
|
||||
├── app/
|
||||
│ ├── __init__.py
|
||||
│ ├── main.py # FastAPI endpoints
|
||||
│ └── ltx_service.py # LTX-Video diffusers pipeline
|
||||
├── setup.sh # Setup script (CUDA + Python deps)
|
||||
├── requirements.txt
|
||||
├── .env.example
|
||||
└── CLAUDE.md
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
| Endpoint | Method | Purpose |
|
||||
|----------|--------|---------|
|
||||
| `/health` | GET | Health check + GPU info |
|
||||
| `/models` | GET | Model info |
|
||||
| `/generate` | POST | Generate video from text prompt |
|
||||
| `/videos/{filename}` | GET | Serve generated video |
|
||||
| `/videos/{filename}` | DELETE | Delete video |
|
||||
| `/unload` | POST | Unload model, free VRAM |
|
||||
| `/cleanup` | POST | Clean old videos |
|
||||
|
||||
## Generate Request
|
||||
|
||||
```json
|
||||
{
|
||||
"prompt": "A timelapse of a flower blooming",
|
||||
"negative_prompt": "blurry, low quality",
|
||||
"width": 704,
|
||||
"height": 480,
|
||||
"num_frames": 81,
|
||||
"fps": 25,
|
||||
"steps": 30,
|
||||
"guidance_scale": 7.5,
|
||||
"seed": null
|
||||
}
|
||||
```
|
||||
|
||||
## Generate Response
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"video_url": "/videos/abc123.mp4",
|
||||
"prompt": "A timelapse of a flower blooming",
|
||||
"width": 704,
|
||||
"height": 480,
|
||||
"num_frames": 81,
|
||||
"fps": 25,
|
||||
"duration": 3.24,
|
||||
"steps": 30,
|
||||
"seed": 42,
|
||||
"generation_time": 18.5
|
||||
}
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `PORT` | `3026` | Service port |
|
||||
| `LTX_MODEL_ID` | `Lightricks/LTX-Video` | HuggingFace model ID |
|
||||
| `DEVICE` | `cuda` | PyTorch device |
|
||||
| `DEFAULT_WIDTH` | `704` | Default video width |
|
||||
| `DEFAULT_HEIGHT` | `480` | Default video height |
|
||||
| `DEFAULT_NUM_FRAMES` | `81` | Default frame count (~3.2s) |
|
||||
| `DEFAULT_FPS` | `25` | Default framerate |
|
||||
| `DEFAULT_STEPS` | `30` | Default inference steps |
|
||||
| `DEFAULT_GUIDANCE_SCALE` | `7.5` | Default CFG scale |
|
||||
| `GENERATION_TIMEOUT` | `600` | Timeout in seconds |
|
||||
| `MAX_PROMPT_LENGTH` | `2000` | Max prompt chars |
|
||||
| `MAX_FRAMES` | `161` | Max frames (~6.4s) |
|
||||
| `CORS_ORIGINS` | (production URLs) | CORS config |
|
||||
|
||||
## Model Details
|
||||
|
||||
### LTX-Video
|
||||
|
||||
- **Parameters**: ~2 billion
|
||||
- **License**: Lightricks Open License (commercial use allowed)
|
||||
- **Download size**: ~4 GB (auto-downloaded on first use)
|
||||
- **VRAM usage**: ~10 GB
|
||||
- **Optimal settings**: 704x480, 30 steps, 7.5 guidance
|
||||
- **Speed on RTX 3090**: 10-30 seconds per clip
|
||||
|
||||
## VRAM Management
|
||||
|
||||
The GPU server runs multiple AI services. LTX-Video uses ~10 GB VRAM:
|
||||
|
||||
- Model loads lazily on first `/generate` request
|
||||
- Use `POST /unload` to free VRAM when not generating videos
|
||||
- Other services (mana-image-gen, mana-stt, mana-tts) share the same GPU
|
||||
- `enable_model_cpu_offload()` moves unused layers to CPU automatically
|
||||
|
||||
## Performance (RTX 3090)
|
||||
|
||||
| Resolution | Frames | Steps | Time |
|
||||
|------------|--------|-------|------|
|
||||
| 512x320 | 41 | 20 | ~8s |
|
||||
| 704x480 | 81 | 30 | ~20s |
|
||||
| 704x480 | 41 | 20 | ~10s |
|
||||
| 1280x720 | 41 | 30 | ~45s |
|
||||
|
||||
## Integration
|
||||
|
||||
Used by:
|
||||
- **Picture App** — video generation alongside images
|
||||
- **Chat App** — inline video generation
|
||||
- **Matrix Bots** — video generation via chat commands
|
||||
|
||||
### Example (TypeScript)
|
||||
|
||||
```typescript
|
||||
const response = await fetch('http://192.168.178.11:3026/generate', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
prompt: 'Ocean waves crashing on rocks at sunset',
|
||||
width: 704,
|
||||
height: 480,
|
||||
num_frames: 81,
|
||||
}),
|
||||
});
|
||||
|
||||
const result = await response.json();
|
||||
const videoUrl = `http://192.168.178.11:3026${result.video_url}`;
|
||||
```
|
||||
1
services/mana-video-gen/app/__init__.py
Normal file
1
services/mana-video-gen/app/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Mana Video Generation - AI Video Generation Microservice."""
|
||||
277
services/mana-video-gen/app/ltx_service.py
Normal file
277
services/mana-video-gen/app/ltx_service.py
Normal file
|
|
@ -0,0 +1,277 @@
|
|||
"""
|
||||
LTX-Video Service
|
||||
|
||||
Uses LTX-Video 0.9.x via HuggingFace diffusers for fast video generation.
|
||||
Optimized for NVIDIA GPUs (CUDA).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration
|
||||
MODEL_ID = os.getenv("LTX_MODEL_ID", "Lightricks/LTX-Video")
|
||||
DEFAULT_WIDTH = int(os.getenv("DEFAULT_WIDTH", "704"))
|
||||
DEFAULT_HEIGHT = int(os.getenv("DEFAULT_HEIGHT", "480"))
|
||||
DEFAULT_NUM_FRAMES = int(os.getenv("DEFAULT_NUM_FRAMES", "81")) # ~3.2s at 25fps
|
||||
DEFAULT_FPS = int(os.getenv("DEFAULT_FPS", "25"))
|
||||
DEFAULT_STEPS = int(os.getenv("DEFAULT_STEPS", "30"))
|
||||
DEFAULT_GUIDANCE_SCALE = float(os.getenv("DEFAULT_GUIDANCE_SCALE", "7.5"))
|
||||
GENERATION_TIMEOUT = int(os.getenv("GENERATION_TIMEOUT", "600")) # seconds
|
||||
DEVICE = os.getenv("DEVICE", "cuda")
|
||||
|
||||
# Output directory for generated videos
|
||||
OUTPUT_DIR = Path(os.getenv("OUTPUT_DIR", "/tmp/mana-video-gen"))
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Pipeline singleton (lazy loaded)
|
||||
_pipeline = None
|
||||
_pipeline_lock = asyncio.Lock()
|
||||
|
||||
|
||||
@dataclass
|
||||
class GenerationResult:
|
||||
"""Result of video generation."""
|
||||
|
||||
video_path: str
|
||||
prompt: str
|
||||
width: int
|
||||
height: int
|
||||
num_frames: int
|
||||
fps: int
|
||||
steps: int
|
||||
seed: int
|
||||
generation_time: float
|
||||
|
||||
|
||||
def is_model_available() -> bool:
|
||||
"""Check if the model can be loaded (CUDA available + diffusers installed)."""
|
||||
try:
|
||||
import torch
|
||||
|
||||
return torch.cuda.is_available()
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
def get_model_info() -> dict:
|
||||
"""Get information about the LTX-Video model."""
|
||||
try:
|
||||
import torch
|
||||
|
||||
gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A"
|
||||
vram_gb = (
|
||||
round(torch.cuda.get_device_properties(0).total_mem / 1e9, 1)
|
||||
if torch.cuda.is_available()
|
||||
else 0
|
||||
)
|
||||
except ImportError:
|
||||
gpu_name = "N/A"
|
||||
vram_gb = 0
|
||||
|
||||
return {
|
||||
"model_id": MODEL_ID,
|
||||
"model_name": "LTX-Video",
|
||||
"parameters": "~2B",
|
||||
"license": "Lightricks Open License (commercial OK)",
|
||||
"cuda_available": is_model_available(),
|
||||
"gpu": gpu_name,
|
||||
"vram_gb": vram_gb,
|
||||
"default_resolution": f"{DEFAULT_WIDTH}x{DEFAULT_HEIGHT}",
|
||||
"default_frames": DEFAULT_NUM_FRAMES,
|
||||
"default_fps": DEFAULT_FPS,
|
||||
"default_steps": DEFAULT_STEPS,
|
||||
"pipeline_loaded": _pipeline is not None,
|
||||
}
|
||||
|
||||
|
||||
async def _load_pipeline():
|
||||
"""Load the LTX-Video pipeline (lazy, thread-safe)."""
|
||||
global _pipeline
|
||||
|
||||
async with _pipeline_lock:
|
||||
if _pipeline is not None:
|
||||
return _pipeline
|
||||
|
||||
logger.info(f"Loading LTX-Video pipeline from {MODEL_ID}...")
|
||||
load_start = time.time()
|
||||
|
||||
def _load():
|
||||
import torch
|
||||
from diffusers import LTXPipeline
|
||||
|
||||
pipe = LTXPipeline.from_pretrained(
|
||||
MODEL_ID,
|
||||
torch_dtype=torch.bfloat16,
|
||||
)
|
||||
pipe.to(DEVICE)
|
||||
# Enable memory optimizations
|
||||
pipe.enable_model_cpu_offload()
|
||||
return pipe
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
_pipeline = await loop.run_in_executor(None, _load)
|
||||
|
||||
load_time = time.time() - load_start
|
||||
logger.info(f"LTX-Video pipeline loaded in {load_time:.1f}s")
|
||||
return _pipeline
|
||||
|
||||
|
||||
async def unload_pipeline():
|
||||
"""Unload pipeline to free VRAM."""
|
||||
global _pipeline
|
||||
|
||||
async with _pipeline_lock:
|
||||
if _pipeline is not None:
|
||||
import torch
|
||||
|
||||
del _pipeline
|
||||
_pipeline = None
|
||||
torch.cuda.empty_cache()
|
||||
logger.info("LTX-Video pipeline unloaded, VRAM freed")
|
||||
|
||||
|
||||
async def generate_video(
|
||||
prompt: str,
|
||||
width: int = DEFAULT_WIDTH,
|
||||
height: int = DEFAULT_HEIGHT,
|
||||
num_frames: int = DEFAULT_NUM_FRAMES,
|
||||
fps: int = DEFAULT_FPS,
|
||||
steps: int = DEFAULT_STEPS,
|
||||
guidance_scale: float = DEFAULT_GUIDANCE_SCALE,
|
||||
seed: Optional[int] = None,
|
||||
negative_prompt: str = "",
|
||||
) -> GenerationResult:
|
||||
"""
|
||||
Generate a video from a text prompt using LTX-Video.
|
||||
|
||||
Args:
|
||||
prompt: Text prompt for video generation
|
||||
width: Video width (default 704)
|
||||
height: Video height (default 480)
|
||||
num_frames: Number of frames (default 81 = ~3.2s at 25fps)
|
||||
fps: Frames per second for output (default 25)
|
||||
steps: Number of inference steps (default 30)
|
||||
guidance_scale: CFG scale (default 7.5)
|
||||
seed: Random seed (None for random)
|
||||
negative_prompt: Negative prompt
|
||||
|
||||
Returns:
|
||||
GenerationResult with video path and metadata
|
||||
|
||||
Raises:
|
||||
RuntimeError: If model not available or generation fails
|
||||
"""
|
||||
if not is_model_available():
|
||||
raise RuntimeError("CUDA not available - cannot generate video")
|
||||
|
||||
pipe = await _load_pipeline()
|
||||
|
||||
video_id = str(uuid.uuid4())[:8]
|
||||
output_path = OUTPUT_DIR / f"{video_id}.mp4"
|
||||
|
||||
import torch
|
||||
|
||||
actual_seed = seed if seed is not None else torch.randint(0, 2**32, (1,)).item()
|
||||
generator = torch.Generator(device="cpu").manual_seed(actual_seed)
|
||||
|
||||
logger.info(
|
||||
f"Generating video: {width}x{height}, {num_frames} frames, {steps} steps, seed={actual_seed}"
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
def _generate():
|
||||
result = pipe(
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt or None,
|
||||
width=width,
|
||||
height=height,
|
||||
num_frames=num_frames,
|
||||
num_inference_steps=steps,
|
||||
guidance_scale=guidance_scale,
|
||||
generator=generator,
|
||||
)
|
||||
return result
|
||||
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await asyncio.wait_for(
|
||||
loop.run_in_executor(None, _generate),
|
||||
timeout=GENERATION_TIMEOUT,
|
||||
)
|
||||
|
||||
generation_time = time.time() - start_time
|
||||
|
||||
# Export to MP4
|
||||
from diffusers.utils import export_to_video
|
||||
|
||||
export_to_video(result.frames[0], str(output_path), fps=fps)
|
||||
|
||||
if not output_path.exists():
|
||||
raise RuntimeError("Video generation completed but output file not found")
|
||||
|
||||
file_size_mb = output_path.stat().st_size / (1024 * 1024)
|
||||
duration_s = num_frames / fps
|
||||
|
||||
logger.info(
|
||||
f"Video generated: {output_path} ({width}x{height}, {duration_s:.1f}s, "
|
||||
f"{file_size_mb:.1f}MB, took {generation_time:.1f}s)"
|
||||
)
|
||||
|
||||
return GenerationResult(
|
||||
video_path=str(output_path),
|
||||
prompt=prompt,
|
||||
width=width,
|
||||
height=height,
|
||||
num_frames=num_frames,
|
||||
fps=fps,
|
||||
steps=steps,
|
||||
seed=actual_seed,
|
||||
generation_time=generation_time,
|
||||
)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(f"Video generation timed out after {GENERATION_TIMEOUT}s")
|
||||
raise RuntimeError(f"Generation timed out after {GENERATION_TIMEOUT} seconds")
|
||||
except Exception as e:
|
||||
logger.error(f"Video generation error: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def cleanup_video(video_path: str) -> bool:
|
||||
"""Delete a generated video file."""
|
||||
try:
|
||||
path = Path(video_path)
|
||||
if path.exists() and path.parent == OUTPUT_DIR:
|
||||
path.unlink()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to cleanup video {video_path}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def cleanup_old_videos(max_age_hours: int = 24) -> int:
|
||||
"""Clean up videos older than max_age_hours."""
|
||||
cleaned = 0
|
||||
cutoff = time.time() - (max_age_hours * 3600)
|
||||
|
||||
try:
|
||||
for file in OUTPUT_DIR.iterdir():
|
||||
if file.is_file() and file.stat().st_mtime < cutoff:
|
||||
file.unlink()
|
||||
cleaned += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Cleanup error: {e}")
|
||||
|
||||
if cleaned > 0:
|
||||
logger.info(f"Cleaned up {cleaned} old videos")
|
||||
|
||||
return cleaned
|
||||
404
services/mana-video-gen/app/main.py
Normal file
404
services/mana-video-gen/app/main.py
Normal file
|
|
@ -0,0 +1,404 @@
|
|||
"""
|
||||
Mana Video Generation - AI Video Generation Microservice
|
||||
|
||||
Provides video generation using LTX-Video via HuggingFace diffusers.
|
||||
Optimized for NVIDIA RTX 3090 (CUDA).
|
||||
|
||||
API:
|
||||
- POST /generate - Generate video from text prompt
|
||||
- GET /health - Health check
|
||||
- GET /models - Model information
|
||||
- POST /unload - Free VRAM by unloading model
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, HTTPException, Response, BackgroundTasks
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .ltx_service import (
|
||||
generate_video,
|
||||
unload_pipeline,
|
||||
is_model_available,
|
||||
get_model_info,
|
||||
cleanup_video,
|
||||
cleanup_old_videos,
|
||||
DEFAULT_WIDTH,
|
||||
DEFAULT_HEIGHT,
|
||||
DEFAULT_NUM_FRAMES,
|
||||
DEFAULT_FPS,
|
||||
DEFAULT_STEPS,
|
||||
DEFAULT_GUIDANCE_SCALE,
|
||||
)
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration from environment
|
||||
PORT = int(os.getenv("PORT", "3026"))
|
||||
MAX_PROMPT_LENGTH = int(os.getenv("MAX_PROMPT_LENGTH", "2000"))
|
||||
MIN_DIMENSION = int(os.getenv("MIN_DIMENSION", "256"))
|
||||
MAX_DIMENSION = int(os.getenv("MAX_DIMENSION", "1280"))
|
||||
MAX_FRAMES = int(os.getenv("MAX_FRAMES", "161")) # ~6.4s at 25fps
|
||||
MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
|
||||
CORS_ORIGINS = os.getenv(
|
||||
"CORS_ORIGINS",
|
||||
"https://mana.how,https://picture.mana.how,https://chat.mana.how,http://localhost:5173",
|
||||
).split(",")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan manager for startup/shutdown."""
|
||||
logger.info(f"Starting Mana Video Generation service on port {PORT}")
|
||||
|
||||
if is_model_available():
|
||||
info = get_model_info()
|
||||
logger.info(f"CUDA available: {info['gpu']} ({info['vram_gb']} GB VRAM)")
|
||||
else:
|
||||
logger.warning("CUDA not available - service will return errors until GPU is accessible")
|
||||
|
||||
# Cleanup old videos on startup
|
||||
cleanup_old_videos(max_age_hours=24)
|
||||
|
||||
yield
|
||||
|
||||
# Unload model on shutdown
|
||||
await unload_pipeline()
|
||||
logger.info("Shutting down Mana Video Generation service")
|
||||
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="Mana Video Generation",
|
||||
description="AI video generation service using LTX-Video",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
# CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=CORS_ORIGINS,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Request/Response Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class GenerateRequest(BaseModel):
|
||||
"""Request for video generation."""
|
||||
|
||||
prompt: str = Field(
|
||||
...,
|
||||
description="Text prompt for video generation",
|
||||
min_length=1,
|
||||
max_length=2000,
|
||||
)
|
||||
negative_prompt: str = Field(
|
||||
"",
|
||||
description="Negative prompt (what to avoid)",
|
||||
max_length=1000,
|
||||
)
|
||||
width: int = Field(
|
||||
DEFAULT_WIDTH,
|
||||
ge=256,
|
||||
le=1280,
|
||||
description="Video width in pixels (must be divisible by 32)",
|
||||
)
|
||||
height: int = Field(
|
||||
DEFAULT_HEIGHT,
|
||||
ge=256,
|
||||
le=1280,
|
||||
description="Video height in pixels (must be divisible by 32)",
|
||||
)
|
||||
num_frames: int = Field(
|
||||
DEFAULT_NUM_FRAMES,
|
||||
ge=9,
|
||||
le=161,
|
||||
description="Number of frames (81 = ~3.2s at 25fps)",
|
||||
)
|
||||
fps: int = Field(
|
||||
DEFAULT_FPS,
|
||||
ge=8,
|
||||
le=30,
|
||||
description="Frames per second for output video",
|
||||
)
|
||||
steps: int = Field(
|
||||
DEFAULT_STEPS,
|
||||
ge=1,
|
||||
le=50,
|
||||
description="Number of inference steps",
|
||||
)
|
||||
guidance_scale: float = Field(
|
||||
DEFAULT_GUIDANCE_SCALE,
|
||||
ge=1.0,
|
||||
le=20.0,
|
||||
description="Classifier-free guidance scale",
|
||||
)
|
||||
seed: Optional[int] = Field(
|
||||
None,
|
||||
ge=0,
|
||||
description="Random seed (None for random)",
|
||||
)
|
||||
|
||||
|
||||
class GenerateResponse(BaseModel):
|
||||
"""Response for video generation."""
|
||||
|
||||
success: bool
|
||||
video_url: str
|
||||
prompt: str
|
||||
width: int
|
||||
height: int
|
||||
num_frames: int
|
||||
fps: int
|
||||
duration: float
|
||||
steps: int
|
||||
seed: int
|
||||
generation_time: float
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
"""Health check response."""
|
||||
|
||||
status: str
|
||||
service: str
|
||||
cuda_available: bool
|
||||
gpu: str
|
||||
|
||||
|
||||
class ModelsResponse(BaseModel):
|
||||
"""Available models response."""
|
||||
|
||||
ltx_video: dict
|
||||
|
||||
|
||||
class ErrorResponse(BaseModel):
|
||||
"""Error response."""
|
||||
|
||||
error: str
|
||||
detail: str
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Health & Info Endpoints
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@app.get("/health", response_model=HealthResponse)
|
||||
async def health_check():
|
||||
"""Check service health and CUDA availability."""
|
||||
info = get_model_info()
|
||||
return HealthResponse(
|
||||
status="healthy" if is_model_available() else "degraded",
|
||||
service="mana-video-gen",
|
||||
cuda_available=info["cuda_available"],
|
||||
gpu=info["gpu"],
|
||||
)
|
||||
|
||||
|
||||
@app.get("/models", response_model=ModelsResponse)
|
||||
async def get_models():
|
||||
"""Get information about available models."""
|
||||
return ModelsResponse(ltx_video=get_model_info())
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Video Generation Endpoints
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@app.post("/generate", response_model=GenerateResponse)
|
||||
async def generate(request: GenerateRequest, background_tasks: BackgroundTasks):
|
||||
"""
|
||||
Generate a video from a text prompt using LTX-Video.
|
||||
|
||||
LTX-Video generates 480p video clips in 10-30 seconds on RTX 3090.
|
||||
The model is loaded on first request and stays in VRAM until /unload.
|
||||
"""
|
||||
# Validate prompt
|
||||
if len(request.prompt) > MAX_PROMPT_LENGTH:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Prompt exceeds maximum length of {MAX_PROMPT_LENGTH} characters",
|
||||
)
|
||||
|
||||
if not request.prompt.strip():
|
||||
raise HTTPException(status_code=400, detail="Prompt cannot be empty")
|
||||
|
||||
# Validate dimensions are divisible by 32 (required by VAE)
|
||||
if request.width % 32 != 0:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Width must be divisible by 32 (got {request.width})",
|
||||
)
|
||||
if request.height % 32 != 0:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Height must be divisible by 32 (got {request.height})",
|
||||
)
|
||||
|
||||
# Validate frames
|
||||
if request.num_frames > MAX_FRAMES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"num_frames must be at most {MAX_FRAMES}",
|
||||
)
|
||||
|
||||
# Validate steps
|
||||
if request.steps > MAX_STEPS:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Steps must be at most {MAX_STEPS}",
|
||||
)
|
||||
|
||||
# Check CUDA availability
|
||||
if not is_model_available():
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Video generation service not available. CUDA not detected.",
|
||||
)
|
||||
|
||||
try:
|
||||
result = await generate_video(
|
||||
prompt=request.prompt,
|
||||
negative_prompt=request.negative_prompt,
|
||||
width=request.width,
|
||||
height=request.height,
|
||||
num_frames=request.num_frames,
|
||||
fps=request.fps,
|
||||
steps=request.steps,
|
||||
guidance_scale=request.guidance_scale,
|
||||
seed=request.seed,
|
||||
)
|
||||
|
||||
video_filename = Path(result.video_path).name
|
||||
video_url = f"/videos/{video_filename}"
|
||||
|
||||
return GenerateResponse(
|
||||
success=True,
|
||||
video_url=video_url,
|
||||
prompt=result.prompt,
|
||||
width=result.width,
|
||||
height=result.height,
|
||||
num_frames=result.num_frames,
|
||||
fps=result.fps,
|
||||
duration=round(result.num_frames / result.fps, 2),
|
||||
steps=result.steps,
|
||||
seed=result.seed,
|
||||
generation_time=round(result.generation_time, 2),
|
||||
)
|
||||
|
||||
except RuntimeError as e:
|
||||
logger.error(f"Generation error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Video generation failed: {e}")
|
||||
|
||||
|
||||
@app.get("/videos/{filename}")
|
||||
async def get_video(filename: str):
|
||||
"""Serve a generated video."""
|
||||
from .ltx_service import OUTPUT_DIR
|
||||
|
||||
# Security: only allow specific extensions and no path traversal
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||
|
||||
allowed_extensions = {".mp4", ".webm"}
|
||||
ext = Path(filename).suffix.lower()
|
||||
if ext not in allowed_extensions:
|
||||
raise HTTPException(status_code=400, detail="Invalid file type")
|
||||
|
||||
video_path = OUTPUT_DIR / filename
|
||||
if not video_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Video not found")
|
||||
|
||||
media_type = "video/mp4" if ext == ".mp4" else "video/webm"
|
||||
return FileResponse(video_path, media_type=media_type)
|
||||
|
||||
|
||||
@app.delete("/videos/{filename}")
|
||||
async def delete_video(filename: str):
|
||||
"""Delete a generated video."""
|
||||
from .ltx_service import OUTPUT_DIR
|
||||
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||
|
||||
video_path = OUTPUT_DIR / filename
|
||||
if not video_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Video not found")
|
||||
|
||||
if cleanup_video(str(video_path)):
|
||||
return {"success": True, "message": f"Video {filename} deleted"}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to delete video")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# VRAM Management Endpoints
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@app.post("/unload")
|
||||
async def unload_model():
|
||||
"""Unload the model from VRAM to free memory for other services."""
|
||||
await unload_pipeline()
|
||||
return {"success": True, "message": "Model unloaded, VRAM freed"}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Maintenance Endpoints
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@app.post("/cleanup")
|
||||
async def cleanup_videos(max_age_hours: int = 24):
|
||||
"""Clean up old generated videos."""
|
||||
cleaned = cleanup_old_videos(max_age_hours)
|
||||
return {"success": True, "cleaned": cleaned}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Error Handler
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@app.exception_handler(Exception)
|
||||
async def global_exception_handler(request, exc):
|
||||
"""Handle uncaught exceptions."""
|
||||
logger.error(f"Unhandled exception: {exc}")
|
||||
return Response(
|
||||
content=f'{{"error": "Internal server error", "detail": "{str(exc)}"}}',
|
||||
status_code=500,
|
||||
media_type="application/json",
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Main
|
||||
# ============================================================================
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=PORT)
|
||||
20
services/mana-video-gen/requirements.txt
Normal file
20
services/mana-video-gen/requirements.txt
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# Web Framework
|
||||
fastapi>=0.115.0
|
||||
uvicorn[standard]>=0.34.0
|
||||
python-multipart>=0.0.20
|
||||
|
||||
# ML / Video Generation
|
||||
torch>=2.5.0
|
||||
diffusers>=0.32.0
|
||||
transformers>=4.46.0
|
||||
accelerate>=1.2.0
|
||||
sentencepiece>=0.2.0
|
||||
|
||||
# Video Processing
|
||||
imageio[ffmpeg]>=2.36.0
|
||||
pillow>=10.0.0
|
||||
numpy>=1.26.0
|
||||
|
||||
# Utilities
|
||||
aiofiles>=24.1.0
|
||||
httpx>=0.27.0
|
||||
89
services/mana-video-gen/setup.sh
Executable file
89
services/mana-video-gen/setup.sh
Executable file
|
|
@ -0,0 +1,89 @@
|
|||
#!/bin/bash
|
||||
# ============================================================================
|
||||
# Mana Video Generation - Setup Script
|
||||
# Installs LTX-Video for NVIDIA GPU (CUDA)
|
||||
# ============================================================================
|
||||
|
||||
set -e
|
||||
|
||||
echo "============================================"
|
||||
echo " Mana Video Generation - Setup"
|
||||
echo " Model: LTX-Video (~2B params)"
|
||||
echo " Requires: NVIDIA GPU with CUDA"
|
||||
echo "============================================"
|
||||
echo ""
|
||||
|
||||
# ---- Check NVIDIA GPU ----
|
||||
echo "[1/4] Checking GPU..."
|
||||
|
||||
if command -v nvidia-smi &> /dev/null; then
|
||||
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)
|
||||
GPU_MEM=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader | head -1)
|
||||
echo " GPU: $GPU_NAME ($GPU_MEM)"
|
||||
else
|
||||
echo " WARNING: nvidia-smi not found. CUDA may not be available."
|
||||
echo " Install NVIDIA drivers and CUDA toolkit first."
|
||||
fi
|
||||
|
||||
# ---- Python venv ----
|
||||
echo ""
|
||||
echo "[2/4] Setting up Python environment..."
|
||||
|
||||
if [ ! -d ".venv" ]; then
|
||||
python3 -m venv .venv
|
||||
echo " Created virtual environment"
|
||||
else
|
||||
echo " Virtual environment already exists"
|
||||
fi
|
||||
|
||||
source .venv/bin/activate
|
||||
pip install --upgrade pip -q
|
||||
|
||||
# ---- Install dependencies ----
|
||||
echo ""
|
||||
echo "[3/4] Installing dependencies..."
|
||||
echo " This will download PyTorch + LTX-Video (~5 GB total)"
|
||||
|
||||
# Install PyTorch with CUDA support
|
||||
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121 -q
|
||||
|
||||
# Install remaining dependencies
|
||||
pip install -r requirements.txt -q
|
||||
|
||||
echo " Dependencies installed"
|
||||
|
||||
# ---- Verify installation ----
|
||||
echo ""
|
||||
echo "[4/4] Verifying installation..."
|
||||
|
||||
python3 -c "
|
||||
import torch
|
||||
print(f' PyTorch: {torch.__version__}')
|
||||
print(f' CUDA available: {torch.cuda.is_available()}')
|
||||
if torch.cuda.is_available():
|
||||
print(f' GPU: {torch.cuda.get_device_name(0)}')
|
||||
print(f' VRAM: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB')
|
||||
|
||||
import diffusers
|
||||
print(f' Diffusers: {diffusers.__version__}')
|
||||
print()
|
||||
print(' LTX-Video model will be downloaded on first generation (~4 GB)')
|
||||
"
|
||||
|
||||
# ---- Create output directory ----
|
||||
mkdir -p /tmp/mana-video-gen
|
||||
|
||||
echo ""
|
||||
echo "============================================"
|
||||
echo " Setup complete!"
|
||||
echo ""
|
||||
echo " Start the service:"
|
||||
echo " source .venv/bin/activate"
|
||||
echo " uvicorn app.main:app --host 0.0.0.0 --port 3026 --reload"
|
||||
echo ""
|
||||
echo " Test:"
|
||||
echo " curl http://localhost:3026/health"
|
||||
echo " curl -X POST http://localhost:3026/generate \\"
|
||||
echo " -H 'Content-Type: application/json' \\"
|
||||
echo " -d '{\"prompt\": \"A cat walking in a garden\"}'"
|
||||
echo "============================================"
|
||||
Loading…
Add table
Add a link
Reference in a new issue