mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 20:41:09 +02:00
chore: remove staging/Hetzner infra, add Watchtower auto-deploy
- Remove old Hetzner deployment workflows (cd-staging, cd-production) - Remove staging docker-compose files - Remove outdated staging/Hetzner documentation - Add Watchtower to docker-compose.macmini.yml for auto-updates - Update CLAUDE.md with Mac Mini server access - Simplify docs/DEPLOYMENT.md for new architecture Production now runs on Mac Mini with automatic deployments via Watchtower. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
f47bf8edd9
commit
ac663a6c91
27 changed files with 104 additions and 15582 deletions
389
.github/workflows/cd-production.yml
vendored
389
.github/workflows/cd-production.yml
vendored
|
|
@ -1,389 +0,0 @@
|
|||
# Production Deployment
|
||||
#
|
||||
# Triggered by:
|
||||
# - Manual only (workflow_dispatch with confirmation)
|
||||
#
|
||||
# Flow: dev (staging) → main (production)
|
||||
# Requires typing "deploy" to confirm
|
||||
name: CD - Production Deployment
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
service:
|
||||
description: 'Service to deploy'
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- all
|
||||
- mana-core-auth
|
||||
- maerchenzauber-backend
|
||||
- chat-backend
|
||||
- manadeck-backend
|
||||
- nutriphi-backend
|
||||
- news-api
|
||||
environment:
|
||||
description: 'Deployment environment'
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- production
|
||||
confirm:
|
||||
description: 'Type "deploy" to confirm production deployment'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
NODE_VERSION: '20'
|
||||
PNPM_VERSION: '9.15.0'
|
||||
|
||||
jobs:
|
||||
validate-deployment:
|
||||
name: Validate Deployment Request
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Validate confirmation
|
||||
run: |
|
||||
if [ "${{ github.event.inputs.confirm }}" != "deploy" ]; then
|
||||
echo "❌ Deployment not confirmed. Please type 'deploy' to confirm."
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ Deployment confirmed"
|
||||
|
||||
- name: Validate branch
|
||||
run: |
|
||||
if [ "${{ github.ref }}" != "refs/heads/main" ]; then
|
||||
echo "❌ Production deployments must be from main branch"
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ Deploying from main branch"
|
||||
|
||||
- name: Check recent commits
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 10
|
||||
|
||||
- name: Verify recent CI passes
|
||||
run: |
|
||||
echo "Checking recent CI status..."
|
||||
# This would check recent CI runs, simplified for now
|
||||
echo "✅ Recent CI checks verified"
|
||||
|
||||
# Request manual approval for production
|
||||
request-approval:
|
||||
name: Request Production Approval
|
||||
runs-on: ubuntu-latest
|
||||
needs: validate-deployment
|
||||
environment:
|
||||
name: production-approval
|
||||
steps:
|
||||
- name: Approval granted
|
||||
run: |
|
||||
echo "## Production Deployment Approved" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Approved by**: ${{ github.actor }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Service**: ${{ github.event.inputs.service }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Timestamp**: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Create deployment backup
|
||||
create-backup:
|
||||
name: Create Production Backup
|
||||
runs-on: ubuntu-latest
|
||||
needs: request-approval
|
||||
environment:
|
||||
name: production
|
||||
steps:
|
||||
- name: Setup SSH
|
||||
uses: webfactory/ssh-agent@v0.9.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.PRODUCTION_SSH_KEY }}
|
||||
|
||||
- name: Add production server to known hosts
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
ssh-keyscan -H ${{ secrets.PRODUCTION_HOST }} >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Create database backup
|
||||
run: |
|
||||
ssh ${{ secrets.PRODUCTION_USER }}@${{ secrets.PRODUCTION_HOST }} << 'EOF'
|
||||
cd ~/manacore-production
|
||||
|
||||
# Backup timestamp
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
BACKUP_DIR="backups/$TIMESTAMP"
|
||||
mkdir -p $BACKUP_DIR
|
||||
|
||||
# Backup PostgreSQL
|
||||
docker compose exec -T postgres pg_dumpall -U $POSTGRES_USER > $BACKUP_DIR/postgres_backup.sql
|
||||
|
||||
# Backup Redis (if applicable)
|
||||
docker compose exec -T redis redis-cli SAVE || echo "Redis backup skipped"
|
||||
|
||||
# Backup docker-compose and env files
|
||||
cp docker-compose.yml $BACKUP_DIR/
|
||||
cp .env $BACKUP_DIR/.env.backup
|
||||
|
||||
echo "Backup created at: $BACKUP_DIR"
|
||||
ls -lh $BACKUP_DIR/
|
||||
EOF
|
||||
|
||||
- name: Tag current deployment
|
||||
run: |
|
||||
ssh ${{ secrets.PRODUCTION_USER }}@${{ secrets.PRODUCTION_HOST }} << 'EOF'
|
||||
cd ~/manacore-production
|
||||
docker compose images > deployment_images.txt
|
||||
echo "Current deployment tagged: $(date -u +'%Y-%m-%d %H:%M:%S UTC')"
|
||||
EOF
|
||||
|
||||
# Deploy to production
|
||||
deploy-production:
|
||||
name: Deploy to Production
|
||||
runs-on: ubuntu-latest
|
||||
needs: create-backup
|
||||
environment:
|
||||
name: production
|
||||
url: https://api.manacore.app
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup SSH
|
||||
uses: webfactory/ssh-agent@v0.9.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.PRODUCTION_SSH_KEY }}
|
||||
|
||||
- name: Add production server to known hosts
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
ssh-keyscan -H ${{ secrets.PRODUCTION_HOST }} >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Copy deployment files
|
||||
run: |
|
||||
scp docker-compose.production.yml ${{ secrets.PRODUCTION_USER }}@${{ secrets.PRODUCTION_HOST }}:~/manacore-production/docker-compose.yml
|
||||
|
||||
- name: Update environment variables
|
||||
run: |
|
||||
# Create production env file from secrets
|
||||
cat > .env.production << EOF
|
||||
# Database
|
||||
POSTGRES_HOST=${{ secrets.PRODUCTION_POSTGRES_HOST }}
|
||||
POSTGRES_PORT=${{ secrets.PRODUCTION_POSTGRES_PORT }}
|
||||
POSTGRES_DB=${{ secrets.PRODUCTION_POSTGRES_DB }}
|
||||
POSTGRES_USER=${{ secrets.PRODUCTION_POSTGRES_USER }}
|
||||
POSTGRES_PASSWORD=${{ secrets.PRODUCTION_POSTGRES_PASSWORD }}
|
||||
|
||||
# Redis
|
||||
REDIS_HOST=${{ secrets.PRODUCTION_REDIS_HOST }}
|
||||
REDIS_PORT=${{ secrets.PRODUCTION_REDIS_PORT }}
|
||||
REDIS_PASSWORD=${{ secrets.PRODUCTION_REDIS_PASSWORD }}
|
||||
|
||||
# Mana Core Auth
|
||||
MANA_SERVICE_URL=${{ secrets.PRODUCTION_MANA_SERVICE_URL }}
|
||||
JWT_SECRET=${{ secrets.PRODUCTION_JWT_SECRET }}
|
||||
JWT_PUBLIC_KEY=${{ secrets.PRODUCTION_JWT_PUBLIC_KEY }}
|
||||
JWT_PRIVATE_KEY=${{ secrets.PRODUCTION_JWT_PRIVATE_KEY }}
|
||||
|
||||
# Supabase
|
||||
SUPABASE_URL=${{ secrets.PRODUCTION_SUPABASE_URL }}
|
||||
SUPABASE_ANON_KEY=${{ secrets.PRODUCTION_SUPABASE_ANON_KEY }}
|
||||
SUPABASE_SERVICE_ROLE_KEY=${{ secrets.PRODUCTION_SUPABASE_SERVICE_ROLE_KEY }}
|
||||
|
||||
# Azure OpenAI
|
||||
AZURE_OPENAI_ENDPOINT=${{ secrets.PRODUCTION_AZURE_OPENAI_ENDPOINT }}
|
||||
AZURE_OPENAI_API_KEY=${{ secrets.PRODUCTION_AZURE_OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_API_VERSION=2024-12-01-preview
|
||||
|
||||
# Environment
|
||||
NODE_ENV=production
|
||||
EOF
|
||||
|
||||
scp .env.production ${{ secrets.PRODUCTION_USER }}@${{ secrets.PRODUCTION_HOST }}:~/manacore-production/.env
|
||||
rm .env.production
|
||||
|
||||
- name: Pull latest images
|
||||
run: |
|
||||
ssh ${{ secrets.PRODUCTION_USER }}@${{ secrets.PRODUCTION_HOST }} << 'EOF'
|
||||
cd ~/manacore-production
|
||||
docker compose pull
|
||||
EOF
|
||||
|
||||
- name: Run database migrations
|
||||
run: |
|
||||
ssh ${{ secrets.PRODUCTION_USER }}@${{ secrets.PRODUCTION_HOST }} << 'EOF'
|
||||
cd ~/manacore-production
|
||||
|
||||
echo "=== Running Database Migrations ==="
|
||||
echo ""
|
||||
|
||||
# Migration function with retry logic
|
||||
run_migration() {
|
||||
local service=$1
|
||||
local max_attempts=3
|
||||
local timeout=300 # 5 minutes
|
||||
local attempt=1
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
echo "[$service] Migration attempt $attempt/$max_attempts..."
|
||||
|
||||
# Run migration with timeout using a temporary container
|
||||
if timeout $timeout docker compose run --rm $service pnpm run db:migrate 2>&1; then
|
||||
echo "✅ [$service] Migration succeeded"
|
||||
return 0
|
||||
else
|
||||
exit_code=$?
|
||||
if [ $exit_code -eq 124 ]; then
|
||||
echo "⚠️ [$service] Migration timeout after ${timeout}s"
|
||||
else
|
||||
echo "⚠️ [$service] Migration failed with exit code $exit_code"
|
||||
fi
|
||||
|
||||
attempt=$((attempt + 1))
|
||||
if [ $attempt -le $max_attempts ]; then
|
||||
wait_time=$((10 * attempt)) # Backoff: 10s, 20s, 30s
|
||||
echo " Waiting ${wait_time}s before retry..."
|
||||
sleep $wait_time
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "❌ [$service] Migration failed after $max_attempts attempts"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Run migrations for mana-core-auth (central auth service)
|
||||
run_migration mana-core-auth || {
|
||||
echo "❌ mana-core-auth migration failed"
|
||||
echo "⚠️ Continuing with deployment - manual migration may be required"
|
||||
}
|
||||
|
||||
echo ""
|
||||
echo "✅ Migration step completed"
|
||||
EOF
|
||||
|
||||
- name: Deploy with zero-downtime
|
||||
run: |
|
||||
SERVICE="${{ github.event.inputs.service }}"
|
||||
|
||||
ssh ${{ secrets.PRODUCTION_USER }}@${{ secrets.PRODUCTION_HOST }} << EOF
|
||||
cd ~/manacore-production
|
||||
|
||||
if [ "$SERVICE" == "all" ]; then
|
||||
# Rolling update for all services
|
||||
for service in mana-core-auth maerchenzauber-backend chat-backend manadeck-backend nutriphi-backend news-api; do
|
||||
echo "Deploying \$service..."
|
||||
docker compose up -d --no-deps --scale \$service=2 \$service
|
||||
sleep 10
|
||||
docker compose up -d --no-deps --scale \$service=1 \$service
|
||||
done
|
||||
else
|
||||
# Single service deployment
|
||||
echo "Deploying $SERVICE..."
|
||||
docker compose up -d --no-deps $SERVICE
|
||||
fi
|
||||
|
||||
# Cleanup old images
|
||||
docker image prune -f
|
||||
EOF
|
||||
|
||||
- name: Verify deployment
|
||||
run: |
|
||||
# Wait for services to stabilize
|
||||
sleep 30
|
||||
|
||||
SERVICES=(
|
||||
"mana-core-auth:3001:/api/v1/health"
|
||||
"maerchenzauber-backend:3002:/health"
|
||||
"chat-backend:3002:/api/health"
|
||||
)
|
||||
|
||||
for SERVICE_CONFIG in "${SERVICES[@]}"; do
|
||||
IFS=':' read -r SERVICE PORT PATH <<< "$SERVICE_CONFIG"
|
||||
|
||||
echo "Verifying $SERVICE..."
|
||||
ssh ${{ secrets.PRODUCTION_USER }}@${{ secrets.PRODUCTION_HOST }} << EOF
|
||||
HEALTH=\$(docker compose -f ~/manacore-production/docker-compose.yml exec -T $SERVICE wget -q -O - http://localhost:$PORT$PATH || echo "FAILED")
|
||||
|
||||
if [[ "\$HEALTH" == *"FAILED"* ]]; then
|
||||
echo "❌ Health check failed for $SERVICE"
|
||||
docker compose -f ~/manacore-production/docker-compose.yml logs --tail=100 $SERVICE
|
||||
exit 1
|
||||
else
|
||||
echo "✅ Health check passed for $SERVICE"
|
||||
fi
|
||||
EOF
|
||||
done
|
||||
|
||||
- name: Monitor for 5 minutes
|
||||
run: |
|
||||
echo "Monitoring services for 5 minutes..."
|
||||
for i in {1..5}; do
|
||||
echo "Check $i/5..."
|
||||
sleep 60
|
||||
ssh ${{ secrets.PRODUCTION_USER }}@${{ secrets.PRODUCTION_HOST }} << 'EOF'
|
||||
cd ~/manacore-production
|
||||
docker compose ps
|
||||
EOF
|
||||
done
|
||||
echo "✅ Monitoring complete - services stable"
|
||||
|
||||
# Post-deployment verification
|
||||
post-deployment-checks:
|
||||
name: Post-Deployment Checks
|
||||
runs-on: ubuntu-latest
|
||||
needs: deploy-production
|
||||
steps:
|
||||
- name: Run smoke tests
|
||||
run: |
|
||||
# Test key endpoints
|
||||
ENDPOINTS=(
|
||||
"${{ secrets.PRODUCTION_API_URL }}/api/v1/health"
|
||||
"${{ secrets.PRODUCTION_API_URL }}/health"
|
||||
)
|
||||
|
||||
for ENDPOINT in "${ENDPOINTS[@]}"; do
|
||||
echo "Testing: $ENDPOINT"
|
||||
RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" $ENDPOINT)
|
||||
|
||||
if [ "$RESPONSE" -eq 200 ]; then
|
||||
echo "✅ $ENDPOINT is healthy"
|
||||
else
|
||||
echo "❌ $ENDPOINT returned $RESPONSE"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Deployment summary
|
||||
run: |
|
||||
echo "## Production Deployment Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Environment**: Production" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Deployed by**: ${{ github.actor }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Service**: ${{ github.event.inputs.service }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Commit**: ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Timestamp**: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Deployment Status" >> $GITHUB_STEP_SUMMARY
|
||||
echo "✅ All services deployed and verified successfully" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Backup Information" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Pre-deployment backup created and stored" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Notify team
|
||||
notify-deployment:
|
||||
name: Notify Team
|
||||
runs-on: ubuntu-latest
|
||||
needs: post-deployment-checks
|
||||
if: always()
|
||||
steps:
|
||||
- name: Deployment notification
|
||||
run: |
|
||||
STATUS="${{ needs.post-deployment-checks.result }}"
|
||||
|
||||
if [ "$STATUS" == "success" ]; then
|
||||
echo "✅ Production deployment completed successfully"
|
||||
echo "Service: ${{ github.event.inputs.service }}"
|
||||
else
|
||||
echo "❌ Production deployment failed"
|
||||
echo "Please check logs and consider rollback"
|
||||
exit 1
|
||||
fi
|
||||
555
.github/workflows/cd-staging-tagged.yml
vendored
555
.github/workflows/cd-staging-tagged.yml
vendored
|
|
@ -1,555 +0,0 @@
|
|||
name: CD - Staging (Tagged Releases)
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
# Pattern: {project}-staging-v{version} or {project}-v{version}-staging
|
||||
# Examples: chat-staging-v1.0.0, picture-v2.1.0-staging, mana-core-auth-staging-v1.0.0
|
||||
# For multi-app: chat-all-staging-v1.0.0 (deploys backend + web + landing)
|
||||
- '*-staging-v*'
|
||||
- '*-v*-staging'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
project:
|
||||
description: 'Project to deploy'
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- chat
|
||||
- picture
|
||||
- manadeck
|
||||
- zitare
|
||||
- presi
|
||||
- mana-core-auth
|
||||
- todo
|
||||
apps:
|
||||
description: 'Apps to deploy (comma-separated: backend,web,landing or "all")'
|
||||
required: true
|
||||
type: string
|
||||
default: 'backend'
|
||||
version:
|
||||
description: 'Version tag (e.g., v1.0.0)'
|
||||
required: false
|
||||
type: string
|
||||
default: 'latest'
|
||||
|
||||
env:
|
||||
NODE_VERSION: '20'
|
||||
PNPM_VERSION: '9.15.0'
|
||||
REGISTRY: ghcr.io
|
||||
# Note: repository_owner is lowercased for Docker compatibility
|
||||
IMAGE_PREFIX: ghcr.io/memo-2023
|
||||
|
||||
jobs:
|
||||
# Parse tag or inputs to determine what to deploy
|
||||
parse-deployment:
|
||||
name: Parse Deployment Target
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
project: ${{ steps.parse.outputs.project }}
|
||||
version: ${{ steps.parse.outputs.version }}
|
||||
matrix: ${{ steps.matrix.outputs.matrix }}
|
||||
steps:
|
||||
- name: Parse tag or inputs
|
||||
id: parse
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "push" ]; then
|
||||
# Parse from tag: {project}-staging-v{version} or {project}-v{version}-staging
|
||||
# Also supports: {project}-all-staging-v{version} for multi-app deploy
|
||||
TAG="${GITHUB_REF#refs/tags/}"
|
||||
echo "Parsing tag: $TAG"
|
||||
|
||||
# Extract project, app hint, and version from tag
|
||||
if [[ "$TAG" =~ ^(.+)-all-staging-v(.+)$ ]]; then
|
||||
PROJECT="${BASH_REMATCH[1]}"
|
||||
VERSION="v${BASH_REMATCH[2]}"
|
||||
APPS="all"
|
||||
elif [[ "$TAG" =~ ^(.+)-staging-v(.+)$ ]]; then
|
||||
PROJECT="${BASH_REMATCH[1]}"
|
||||
VERSION="v${BASH_REMATCH[2]}"
|
||||
APPS="backend"
|
||||
elif [[ "$TAG" =~ ^(.+)-v(.+)-staging$ ]]; then
|
||||
PROJECT="${BASH_REMATCH[1]}"
|
||||
VERSION="v${BASH_REMATCH[2]}"
|
||||
APPS="backend"
|
||||
else
|
||||
echo "Invalid tag format: $TAG"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
# Use workflow dispatch inputs
|
||||
PROJECT="${{ github.event.inputs.project }}"
|
||||
APPS="${{ github.event.inputs.apps }}"
|
||||
VERSION="${{ github.event.inputs.version }}"
|
||||
fi
|
||||
|
||||
echo "Project: $PROJECT"
|
||||
echo "Apps: $APPS"
|
||||
echo "Version: $VERSION"
|
||||
|
||||
echo "project=$PROJECT" >> $GITHUB_OUTPUT
|
||||
echo "apps=$APPS" >> $GITHUB_OUTPUT
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Generate build matrix
|
||||
id: matrix
|
||||
run: |
|
||||
PROJECT="${{ steps.parse.outputs.project }}"
|
||||
APPS="${{ steps.parse.outputs.apps }}"
|
||||
VERSION="${{ steps.parse.outputs.version }}"
|
||||
|
||||
# Define available apps per project
|
||||
declare -A PROJECT_APPS
|
||||
PROJECT_APPS[chat]="backend,web,landing"
|
||||
PROJECT_APPS[picture]="backend,web,landing"
|
||||
PROJECT_APPS[manadeck]="backend,web"
|
||||
PROJECT_APPS[zitare]="backend,web"
|
||||
PROJECT_APPS[presi]="backend,web"
|
||||
PROJECT_APPS[mana-core-auth]="service"
|
||||
PROJECT_APPS[todo]="backend,web"
|
||||
|
||||
# Expand "all" to available apps
|
||||
if [ "$APPS" == "all" ]; then
|
||||
APPS="${PROJECT_APPS[$PROJECT]}"
|
||||
fi
|
||||
|
||||
# Build JSON matrix
|
||||
MATRIX='{"include":['
|
||||
FIRST=true
|
||||
|
||||
IFS=',' read -ra APP_ARRAY <<< "$APPS"
|
||||
for APP in "${APP_ARRAY[@]}"; do
|
||||
APP=$(echo "$APP" | xargs) # Trim whitespace
|
||||
|
||||
# Determine paths based on project and app
|
||||
case "$PROJECT" in
|
||||
mana-core-auth)
|
||||
DOCKERFILE_PATH="services/mana-core-auth/Dockerfile"
|
||||
CONTEXT_PATH="."
|
||||
IMAGE_NAME="mana-core-auth"
|
||||
PORT="3001"
|
||||
HEALTH_PATH="/api/v1/health"
|
||||
;;
|
||||
*)
|
||||
case "$APP" in
|
||||
backend|service)
|
||||
DOCKERFILE_PATH="apps/$PROJECT/apps/backend/Dockerfile"
|
||||
CONTEXT_PATH="."
|
||||
IMAGE_NAME="${PROJECT}-backend"
|
||||
;;
|
||||
web)
|
||||
# Apps with their own Dockerfiles (need monorepo root for shared packages)
|
||||
case "$PROJECT" in
|
||||
manacore|todo|calendar|clock)
|
||||
DOCKERFILE_PATH="apps/$PROJECT/apps/web/Dockerfile"
|
||||
CONTEXT_PATH="."
|
||||
;;
|
||||
*)
|
||||
DOCKERFILE_PATH="docker/templates/Dockerfile.sveltekit"
|
||||
CONTEXT_PATH="apps/$PROJECT/apps/web"
|
||||
;;
|
||||
esac
|
||||
IMAGE_NAME="${PROJECT}-web"
|
||||
;;
|
||||
landing)
|
||||
DOCKERFILE_PATH="docker/templates/Dockerfile.astro"
|
||||
CONTEXT_PATH="apps/$PROJECT/apps/landing"
|
||||
IMAGE_NAME="${PROJECT}-landing"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Set backend ports per project (must match docker-compose.staging.yml)
|
||||
case "$PROJECT" in
|
||||
chat) PORT="3002" ;;
|
||||
picture) PORT="3006" ;;
|
||||
manadeck) PORT="3009" ;;
|
||||
zitare) PORT="3007" ;;
|
||||
presi) PORT="3008" ;;
|
||||
todo) PORT="3018" ;;
|
||||
esac
|
||||
|
||||
# Override ports for web apps (SvelteKit uses different ports)
|
||||
if [ "$APP" == "web" ]; then
|
||||
case "$PROJECT" in
|
||||
manacore) PORT="5173" ;;
|
||||
todo) PORT="5188" ;;
|
||||
calendar) PORT="5186" ;;
|
||||
clock) PORT="5187" ;;
|
||||
*) PORT="5173" ;; # default SvelteKit port
|
||||
esac
|
||||
fi
|
||||
HEALTH_PATH="/api/v1/health"
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ "$FIRST" = true ]; then
|
||||
FIRST=false
|
||||
else
|
||||
MATRIX+=','
|
||||
fi
|
||||
|
||||
MATRIX+="{\"app\":\"$APP\",\"image_name\":\"$IMAGE_NAME\",\"dockerfile_path\":\"$DOCKERFILE_PATH\",\"context_path\":\"$CONTEXT_PATH\",\"port\":\"$PORT\",\"health_path\":\"$HEALTH_PATH\"}"
|
||||
done
|
||||
|
||||
MATRIX+=']}'
|
||||
|
||||
echo "Generated matrix: $MATRIX"
|
||||
echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
|
||||
|
||||
# Build and push Docker images (parallel for multi-app)
|
||||
build:
|
||||
name: Build ${{ matrix.image_name }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: parse-deployment
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJSON(needs.parse-deployment.outputs.matrix) }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check Dockerfile exists
|
||||
id: check
|
||||
run: |
|
||||
if [ -f "${{ matrix.dockerfile_path }}" ]; then
|
||||
echo "exists=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "Dockerfile not found: ${{ matrix.dockerfile_path }}"
|
||||
echo "exists=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
if: steps.check.outputs.exists == 'true'
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
if: steps.check.outputs.exists == 'true'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
if: steps.check.outputs.exists == 'true'
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.IMAGE_PREFIX }}/${{ matrix.image_name }}
|
||||
tags: |
|
||||
type=raw,value=${{ needs.parse-deployment.outputs.version }}
|
||||
type=raw,value=staging-latest
|
||||
type=sha,prefix=staging-
|
||||
|
||||
- name: Build and push
|
||||
if: steps.check.outputs.exists == 'true'
|
||||
id: build
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ${{ matrix.context_path }}
|
||||
file: ${{ matrix.dockerfile_path }}
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
build-args: |
|
||||
NODE_ENV=staging
|
||||
|
||||
- name: Build summary
|
||||
run: |
|
||||
echo "## Build: ${{ matrix.image_name }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Project**: ${{ needs.parse-deployment.outputs.project }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **App**: ${{ matrix.app }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Version**: ${{ needs.parse-deployment.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Image**: ${{ env.IMAGE_PREFIX }}/${{ matrix.image_name }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Tags**: ${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Deploy to staging (parallel for multi-app)
|
||||
deploy:
|
||||
name: Deploy ${{ matrix.image_name }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: [parse-deployment, build]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJSON(needs.parse-deployment.outputs.matrix) }}
|
||||
environment:
|
||||
name: staging
|
||||
url: https://staging.manacore.app
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup SSH
|
||||
uses: webfactory/ssh-agent@v0.9.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.STAGING_SSH_KEY }}
|
||||
|
||||
- name: Add staging server to known hosts
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
ssh-keyscan -H ${{ secrets.STAGING_HOST }} >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Sync docker-compose to staging
|
||||
run: |
|
||||
# Ensure staging directory exists
|
||||
ssh ${{ secrets.STAGING_USER }}@${{ secrets.STAGING_HOST }} "mkdir -p ~/manacore-staging"
|
||||
# Copy the docker-compose file
|
||||
scp docker-compose.staging.yml ${{ secrets.STAGING_USER }}@${{ secrets.STAGING_HOST }}:~/manacore-staging/docker-compose.yml
|
||||
|
||||
- name: Login to GHCR on staging server
|
||||
run: |
|
||||
ssh ${{ secrets.STAGING_USER }}@${{ secrets.STAGING_HOST }} << EOF
|
||||
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
|
||||
EOF
|
||||
|
||||
- name: Deploy service
|
||||
env:
|
||||
VERSION: ${{ needs.parse-deployment.outputs.version }}
|
||||
IMAGE_NAME: ${{ matrix.image_name }}
|
||||
APP_TYPE: ${{ matrix.app }}
|
||||
PROJECT: ${{ needs.parse-deployment.outputs.project }}
|
||||
run: |
|
||||
# Compute the version variable name locally (before SSH)
|
||||
# Map: todo-web -> TODO_WEB_VERSION, chat-backend -> CHAT_VERSION
|
||||
case "$IMAGE_NAME" in
|
||||
*-web)
|
||||
PROJECT_UPPER=$(echo "$PROJECT" | tr '[:lower:]-' '[:upper:]_')
|
||||
VERSION_VAR="${PROJECT_UPPER}_WEB_VERSION"
|
||||
;;
|
||||
*-backend)
|
||||
PROJECT_UPPER=$(echo "$PROJECT" | tr '[:lower:]-' '[:upper:]_')
|
||||
VERSION_VAR="${PROJECT_UPPER}_VERSION"
|
||||
;;
|
||||
mana-core-auth)
|
||||
VERSION_VAR="AUTH_VERSION"
|
||||
;;
|
||||
*)
|
||||
VERSION_VAR=$(echo "$IMAGE_NAME" | tr '[:lower:]-' '[:upper:]_')_VERSION
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "Will set $VERSION_VAR=$VERSION for docker-compose"
|
||||
|
||||
ssh ${{ secrets.STAGING_USER }}@${{ secrets.STAGING_HOST }} << EOF
|
||||
cd ~/manacore-staging
|
||||
|
||||
echo "Deploying $IMAGE_NAME:$VERSION to staging..."
|
||||
|
||||
# Pull the new image with specific version tag
|
||||
docker pull ${{ env.IMAGE_PREFIX }}/$IMAGE_NAME:$VERSION
|
||||
|
||||
# Update .env file with the version for this service
|
||||
# This ensures docker-compose uses the correct image tag
|
||||
if grep -q "^$VERSION_VAR=" .env 2>/dev/null; then
|
||||
sed -i "s/^$VERSION_VAR=.*/$VERSION_VAR=$VERSION/" .env
|
||||
else
|
||||
echo "Service \$SERVICE_NAME not found in compose, starting..."
|
||||
docker compose up -d --force-recreate \$SERVICE_NAME
|
||||
fi
|
||||
|
||||
echo "Updated .env: $VERSION_VAR=$VERSION"
|
||||
grep "$VERSION_VAR" .env || true
|
||||
|
||||
# Service name matches docker-compose service name (with hyphens)
|
||||
SERVICE_NAME="$IMAGE_NAME"
|
||||
CONTAINER_NAME="${IMAGE_NAME}-staging"
|
||||
|
||||
# Remove any stale container with the same name (prevents "name already in use" error)
|
||||
if docker ps -a --format '{{.Names}}' | grep -q "^\$CONTAINER_NAME\$"; then
|
||||
echo "Removing stale container: \$CONTAINER_NAME"
|
||||
docker rm -f \$CONTAINER_NAME 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Always use --force-recreate to ensure the new image is used
|
||||
echo "Deploying service: \$SERVICE_NAME"
|
||||
docker compose up -d --no-deps --force-recreate \$SERVICE_NAME
|
||||
|
||||
# Wait for startup
|
||||
sleep 10
|
||||
docker compose ps \$SERVICE_NAME
|
||||
|
||||
# Verify correct image is running
|
||||
echo "Running image:"
|
||||
docker inspect --format='{{.Config.Image}}' ${IMAGE_NAME}-staging 2>/dev/null || true
|
||||
|
||||
# Cleanup old images
|
||||
docker image prune -f
|
||||
EOF
|
||||
|
||||
- name: Health check
|
||||
if: matrix.app == 'backend' || matrix.app == 'service'
|
||||
run: |
|
||||
PORT="${{ matrix.port }}"
|
||||
HEALTH_PATH="${{ matrix.health_path }}"
|
||||
|
||||
echo "Running health check on port $PORT$HEALTH_PATH..."
|
||||
|
||||
ssh ${{ secrets.STAGING_USER }}@${{ secrets.STAGING_HOST }} << EOF
|
||||
for i in {1..5}; do
|
||||
RESPONSE=\$(curl -s -o /dev/null -w "%{http_code}" http://localhost:$PORT$HEALTH_PATH || echo "000")
|
||||
if [ "\$RESPONSE" == "200" ]; then
|
||||
echo "Health check passed (attempt \$i)"
|
||||
exit 0
|
||||
fi
|
||||
echo "Health check failed (attempt \$i), response: \$RESPONSE"
|
||||
sleep 5
|
||||
done
|
||||
echo "Health check failed after 5 attempts"
|
||||
exit 1
|
||||
EOF
|
||||
|
||||
- name: Deployment summary
|
||||
run: |
|
||||
echo "## Deploy: ${{ matrix.image_name }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Environment**: Staging" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Project**: ${{ needs.parse-deployment.outputs.project }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **App**: ${{ matrix.app }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Version**: ${{ needs.parse-deployment.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Image**: ${{ env.IMAGE_PREFIX }}/${{ matrix.image_name }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Deployed by**: ${{ github.actor }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Timestamp**: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Run database migrations after deploy
|
||||
migrations:
|
||||
name: Database Migrations
|
||||
runs-on: ubuntu-latest
|
||||
needs: [parse-deployment, deploy]
|
||||
# Only run for projects with backends (not manacore which is web-only)
|
||||
if: needs.parse-deployment.outputs.project != 'manacore'
|
||||
steps:
|
||||
- name: Setup SSH
|
||||
uses: webfactory/ssh-agent@v0.9.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.STAGING_SSH_KEY }}
|
||||
|
||||
- name: Add staging server to known hosts
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
ssh-keyscan -H ${{ secrets.STAGING_HOST }} >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Run database migrations
|
||||
env:
|
||||
PROJECT: ${{ needs.parse-deployment.outputs.project }}
|
||||
run: |
|
||||
# Determine service name based on project
|
||||
case "$PROJECT" in
|
||||
mana-core-auth)
|
||||
SERVICE_NAME="mana-core-auth"
|
||||
;;
|
||||
*)
|
||||
SERVICE_NAME="${PROJECT}-backend"
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "Running database migrations for $SERVICE_NAME..."
|
||||
|
||||
ssh ${{ secrets.STAGING_USER }}@${{ secrets.STAGING_HOST }} << EOF
|
||||
cd ~/manacore-staging
|
||||
|
||||
echo "=== Database Migration for $SERVICE_NAME ==="
|
||||
|
||||
# Check if service is running
|
||||
if ! docker compose ps $SERVICE_NAME --format '{{.State}}' 2>/dev/null | grep -q "running"; then
|
||||
echo "⚠️ Service $SERVICE_NAME is not running, skipping migrations"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Migration function with retry logic
|
||||
run_db_push() {
|
||||
local service=\$1
|
||||
local max_attempts=3
|
||||
local timeout=120 # 2 minutes
|
||||
local attempt=1
|
||||
|
||||
while [ \$attempt -le \$max_attempts ]; do
|
||||
echo "[\$service] db:push attempt \$attempt/\$max_attempts..."
|
||||
|
||||
# Try db:push with timeout (staging uses push, not migrate)
|
||||
if timeout \$timeout docker compose exec -T \$service pnpm run db:push 2>&1; then
|
||||
echo "✅ [\$service] Database schema pushed successfully"
|
||||
return 0
|
||||
else
|
||||
exit_code=\$?
|
||||
if [ \$exit_code -eq 124 ]; then
|
||||
echo "⚠️ [\$service] db:push timeout after \${timeout}s"
|
||||
else
|
||||
echo "⚠️ [\$service] db:push failed with exit code \$exit_code"
|
||||
fi
|
||||
|
||||
attempt=\$((attempt + 1))
|
||||
if [ \$attempt -le \$max_attempts ]; then
|
||||
wait_time=\$((5 * attempt)) # Backoff: 5s, 10s, 15s
|
||||
echo " Waiting \${wait_time}s before retry..."
|
||||
sleep \$wait_time
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "❌ [\$service] db:push failed after \$max_attempts attempts"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Run db:push for the service
|
||||
run_db_push $SERVICE_NAME || {
|
||||
echo "❌ Database migration failed for $SERVICE_NAME"
|
||||
echo "⚠️ You may need to run migrations manually:"
|
||||
echo " ssh deploy@\${{ secrets.STAGING_HOST }} 'cd ~/manacore-staging && docker compose exec -T $SERVICE_NAME pnpm run db:push'"
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo ""
|
||||
echo "✅ Database migrations completed for $SERVICE_NAME"
|
||||
EOF
|
||||
|
||||
- name: Migration summary
|
||||
if: always()
|
||||
run: |
|
||||
echo "## Database Migrations" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Project**: ${{ needs.parse-deployment.outputs.project }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Status**: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Notify on completion
|
||||
notify:
|
||||
name: Deployment Complete
|
||||
runs-on: ubuntu-latest
|
||||
needs: [parse-deployment, build, deploy, migrations]
|
||||
if: always()
|
||||
steps:
|
||||
- name: Deployment notification
|
||||
run: |
|
||||
BUILD_STATUS="${{ needs.build.result }}"
|
||||
DEPLOY_STATUS="${{ needs.deploy.result }}"
|
||||
MIGRATION_STATUS="${{ needs.migrations.result }}"
|
||||
PROJECT="${{ needs.parse-deployment.outputs.project }}"
|
||||
VERSION="${{ needs.parse-deployment.outputs.version }}"
|
||||
|
||||
echo "## Staging Deployment Complete" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Stage | Status |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "|-------|--------|" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Build | $BUILD_STATUS |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Deploy | $DEPLOY_STATUS |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Migrations | $MIGRATION_STATUS |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Project**: $PROJECT" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Version**: $VERSION" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Check all stages (migrations can be skipped for web-only projects)
|
||||
if [ "$BUILD_STATUS" == "success" ] && [ "$DEPLOY_STATUS" == "success" ]; then
|
||||
if [ "$MIGRATION_STATUS" == "success" ] || [ "$MIGRATION_STATUS" == "skipped" ]; then
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "All stages completed successfully" >> $GITHUB_STEP_SUMMARY
|
||||
else
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "⚠️ Migrations failed - database may need manual update" >> $GITHUB_STEP_SUMMARY
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Some deployments failed - check individual job logs" >> $GITHUB_STEP_SUMMARY
|
||||
exit 1
|
||||
fi
|
||||
371
.github/workflows/cd-staging.yml
vendored
371
.github/workflows/cd-staging.yml
vendored
|
|
@ -1,371 +0,0 @@
|
|||
# Staging Deployment
|
||||
#
|
||||
# Triggered by:
|
||||
# - Automatic: Push to dev branch (via ci.yml)
|
||||
# - Manual: workflow_dispatch
|
||||
#
|
||||
# Full config archived at: .github/workflows/cd-staging.full.yml
|
||||
#
|
||||
# To add a service:
|
||||
# 1. Add service to workflow_dispatch options
|
||||
# 2. Add health check in "Run health checks" step
|
||||
# 3. Add service to docker-compose.staging.yml
|
||||
name: CD - Staging Deployment
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
service:
|
||||
description: 'Service to deploy (leave empty for all)'
|
||||
required: false
|
||||
type: choice
|
||||
options:
|
||||
- all
|
||||
- mana-core-auth
|
||||
- chat-backend
|
||||
- chat-web
|
||||
- manacore-web
|
||||
- todo-backend
|
||||
- todo-web
|
||||
- calendar-backend
|
||||
- calendar-web
|
||||
- clock-backend
|
||||
- clock-web
|
||||
- telegram-stats-bot
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: read
|
||||
|
||||
env:
|
||||
NODE_VERSION: '20'
|
||||
PNPM_VERSION: '9.15.0'
|
||||
|
||||
jobs:
|
||||
deploy-staging:
|
||||
name: Deploy to Staging
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: staging
|
||||
url: https://staging.manacore.app
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup SSH for deployment
|
||||
uses: webfactory/ssh-agent@v0.9.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.STAGING_SSH_KEY }}
|
||||
|
||||
- name: Add staging server to known hosts
|
||||
env:
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
ssh-keyscan -H $STAGING_HOST >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Prepare deployment directory
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
ssh $STAGING_USER@$STAGING_HOST << 'EOF'
|
||||
mkdir -p ~/manacore-staging
|
||||
cd ~/manacore-staging
|
||||
|
||||
# Create required directories
|
||||
mkdir -p logs
|
||||
mkdir -p data/postgres
|
||||
mkdir -p data/redis
|
||||
EOF
|
||||
|
||||
- name: Copy docker-compose file
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
scp docker-compose.staging.yml $STAGING_USER@$STAGING_HOST:~/manacore-staging/docker-compose.yml
|
||||
|
||||
- name: Copy environment file
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
# Create staging env file (mix of hardcoded config and secrets)
|
||||
cat > .env.staging << EOF
|
||||
# Database - Configuration
|
||||
POSTGRES_HOST=postgres
|
||||
POSTGRES_PORT=5432
|
||||
POSTGRES_DB=manacore
|
||||
POSTGRES_USER=postgres
|
||||
POSTGRES_PASSWORD=${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
||||
|
||||
# Redis - Configuration
|
||||
REDIS_HOST=redis
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD=${{ secrets.STAGING_REDIS_PASSWORD }}
|
||||
|
||||
# Mana Core Auth - Configuration
|
||||
MANA_SERVICE_URL=http://mana-core-auth:3001
|
||||
JWT_SECRET=${{ secrets.STAGING_JWT_SECRET }}
|
||||
JWT_PUBLIC_KEY=${{ secrets.STAGING_JWT_PUBLIC_KEY }}
|
||||
JWT_PRIVATE_KEY=${{ secrets.STAGING_JWT_PRIVATE_KEY }}
|
||||
|
||||
# Supabase
|
||||
SUPABASE_URL=${{ secrets.STAGING_SUPABASE_URL }}
|
||||
SUPABASE_ANON_KEY=${{ secrets.STAGING_SUPABASE_ANON_KEY }}
|
||||
SUPABASE_SERVICE_ROLE_KEY=${{ secrets.STAGING_SUPABASE_SERVICE_ROLE_KEY }}
|
||||
|
||||
# Azure OpenAI
|
||||
AZURE_OPENAI_ENDPOINT=${{ secrets.STAGING_AZURE_OPENAI_ENDPOINT }}
|
||||
AZURE_OPENAI_API_KEY=${{ secrets.STAGING_AZURE_OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_API_VERSION=2024-12-01-preview
|
||||
|
||||
# Environment
|
||||
NODE_ENV=staging
|
||||
EOF
|
||||
|
||||
scp .env.staging $STAGING_USER@$STAGING_HOST:~/manacore-staging/.env
|
||||
rm .env.staging
|
||||
|
||||
- name: Login to GitHub Container Registry on staging server
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
ssh $STAGING_USER@$STAGING_HOST << EOF
|
||||
# Login to ghcr.io with GitHub token
|
||||
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
|
||||
EOF
|
||||
|
||||
- name: Pull latest Docker images
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
ssh $STAGING_USER@$STAGING_HOST << 'EOF'
|
||||
cd ~/manacore-staging
|
||||
docker compose pull
|
||||
EOF
|
||||
|
||||
- name: Deploy services
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
SERVICE="${{ github.event.inputs.service || 'all' }}"
|
||||
|
||||
ssh $STAGING_USER@$STAGING_HOST << EOF
|
||||
cd ~/manacore-staging
|
||||
|
||||
# Determine which services to deploy
|
||||
if [ "$SERVICE" == "all" ]; then
|
||||
echo "Deploying all services..."
|
||||
docker compose up -d
|
||||
else
|
||||
echo "Deploying service: $SERVICE"
|
||||
docker compose up -d $SERVICE
|
||||
fi
|
||||
|
||||
# Wait for initial startup
|
||||
echo "Waiting for services to start..."
|
||||
sleep 15
|
||||
|
||||
echo "=== Container Status ==="
|
||||
docker compose ps
|
||||
EOF
|
||||
|
||||
- name: Create databases
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
ssh $STAGING_USER@$STAGING_HOST << 'EOF'
|
||||
cd ~/manacore-staging
|
||||
|
||||
echo "Creating required databases..."
|
||||
|
||||
# Create manacore_auth database (for mana-core-auth service)
|
||||
docker compose exec -T postgres psql -U postgres -c "CREATE DATABASE manacore_auth;" 2>/dev/null || echo "manacore_auth database already exists"
|
||||
|
||||
# Create chat database (for chat-backend service)
|
||||
docker compose exec -T postgres psql -U postgres -c "CREATE DATABASE chat;" 2>/dev/null || echo "chat database already exists"
|
||||
|
||||
# Create todo database (for todo-backend service)
|
||||
docker compose exec -T postgres psql -U postgres -c "CREATE DATABASE todo;" 2>/dev/null || echo "todo database already exists"
|
||||
|
||||
# Create calendar database (for calendar-backend service)
|
||||
docker compose exec -T postgres psql -U postgres -c "CREATE DATABASE calendar;" 2>/dev/null || echo "calendar database already exists"
|
||||
|
||||
# Create clock database (for clock-backend service)
|
||||
docker compose exec -T postgres psql -U postgres -c "CREATE DATABASE clock;" 2>/dev/null || echo "clock database already exists"
|
||||
|
||||
echo "✅ Databases ready"
|
||||
EOF
|
||||
|
||||
- name: Run database migrations
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
ssh $STAGING_USER@$STAGING_HOST << 'EOF'
|
||||
cd ~/manacore-staging
|
||||
|
||||
echo "=== Running Database Migrations ==="
|
||||
echo ""
|
||||
|
||||
# Migration function with retry logic
|
||||
run_migration() {
|
||||
local service=$1
|
||||
local max_attempts=3
|
||||
local timeout=300 # 5 minutes
|
||||
local attempt=1
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
echo "[$service] Migration attempt $attempt/$max_attempts..."
|
||||
|
||||
# Run migration with timeout
|
||||
if timeout $timeout docker compose exec -T $service pnpm run db:migrate 2>&1; then
|
||||
echo "✅ [$service] Migration succeeded"
|
||||
return 0
|
||||
else
|
||||
exit_code=$?
|
||||
if [ $exit_code -eq 124 ]; then
|
||||
echo "⚠️ [$service] Migration timeout after ${timeout}s"
|
||||
else
|
||||
echo "⚠️ [$service] Migration failed with exit code $exit_code"
|
||||
fi
|
||||
|
||||
attempt=$((attempt + 1))
|
||||
if [ $attempt -le $max_attempts ]; then
|
||||
wait_time=$((10 * attempt)) # Backoff: 10s, 20s, 30s
|
||||
echo " Waiting ${wait_time}s before retry..."
|
||||
sleep $wait_time
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "❌ [$service] Migration failed after $max_attempts attempts"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Run migrations for services that have db:migrate script
|
||||
# mana-core-auth - central auth service
|
||||
if docker compose exec -T mana-core-auth test -f src/db/migrate.ts 2>/dev/null || \
|
||||
docker compose exec -T mana-core-auth pnpm run db:migrate --help 2>/dev/null; then
|
||||
run_migration mana-core-auth || {
|
||||
echo "❌ mana-core-auth migration failed - aborting deployment"
|
||||
exit 1
|
||||
}
|
||||
else
|
||||
echo "⏭️ [mana-core-auth] No db:migrate script, using db:push..."
|
||||
docker compose exec -T mana-core-auth npx drizzle-kit push --force || echo "Auth schema push completed"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "✅ All migrations completed"
|
||||
EOF
|
||||
|
||||
- name: Run health checks
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
ssh $STAGING_USER@$STAGING_HOST << 'EOF'
|
||||
cd ~/manacore-staging
|
||||
|
||||
echo "=== Health Checks with Polling ==="
|
||||
echo ""
|
||||
|
||||
# Health check function with retry polling
|
||||
check_health() {
|
||||
local service=$1
|
||||
local url=$2
|
||||
local max_attempts=24 # 24 * 5s = 2 minutes max wait
|
||||
local attempt=1
|
||||
|
||||
echo "Checking $service..."
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
# Check if container is running
|
||||
if ! docker compose ps $service 2>/dev/null | grep -q "Up"; then
|
||||
if [ $attempt -eq 1 ]; then
|
||||
echo " ⏳ Waiting for container to start..."
|
||||
fi
|
||||
sleep 5
|
||||
attempt=$((attempt + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Check health endpoint
|
||||
if docker compose exec -T $service wget -q -O - $url > /dev/null 2>&1; then
|
||||
echo " ✅ $service is healthy (attempt $attempt)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ $attempt -eq 1 ]; then
|
||||
echo " ⏳ Waiting for $service to become healthy..."
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
attempt=$((attempt + 1))
|
||||
done
|
||||
|
||||
echo " ❌ $service health check failed after $max_attempts attempts"
|
||||
echo " === Recent Logs ==="
|
||||
docker compose logs --tail=50 $service
|
||||
return 1
|
||||
}
|
||||
|
||||
echo "=== Container Status ==="
|
||||
docker compose ps
|
||||
echo ""
|
||||
|
||||
# Check all services with polling
|
||||
check_health mana-core-auth http://localhost:3001/api/v1/health || exit 1
|
||||
check_health chat-backend http://localhost:3002/api/v1/health || exit 1
|
||||
check_health chat-web http://localhost:3000/health || exit 1
|
||||
check_health manacore-web http://localhost:5173/health || exit 1
|
||||
check_health todo-backend http://localhost:3018/api/v1/health || exit 1
|
||||
check_health todo-web http://localhost:5188/health || exit 1
|
||||
check_health calendar-backend http://localhost:3016/api/v1/health || exit 1
|
||||
check_health calendar-web http://localhost:5186/health || exit 1
|
||||
check_health clock-backend http://localhost:3017/api/v1/health || exit 1
|
||||
check_health clock-web http://localhost:5187/health || exit 1
|
||||
|
||||
echo ""
|
||||
echo "✅ All health checks passed!"
|
||||
EOF
|
||||
|
||||
- name: Deployment summary
|
||||
run: |
|
||||
echo "## Staging Deployment Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Environment**: Staging" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Deployed by**: ${{ github.actor }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Commit**: ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Timestamp**: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Services Deployed" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Service: ${{ github.event.inputs.service || 'all' }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Health Checks" >> $GITHUB_STEP_SUMMARY
|
||||
echo "All health checks passed ✅" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
notify-deployment:
|
||||
name: Notify Deployment
|
||||
runs-on: ubuntu-latest
|
||||
needs: deploy-staging
|
||||
if: always()
|
||||
steps:
|
||||
- name: Deployment notification
|
||||
run: |
|
||||
STATUS="${{ needs.deploy-staging.result }}"
|
||||
|
||||
if [ "$STATUS" == "success" ]; then
|
||||
echo "✅ Staging deployment completed successfully"
|
||||
else
|
||||
echo "❌ Staging deployment failed"
|
||||
exit 1
|
||||
fi
|
||||
264
.github/workflows/cd-staging.yml.bak
vendored
264
.github/workflows/cd-staging.yml.bak
vendored
|
|
@ -1,264 +0,0 @@
|
|||
# ARCHIVED: Full staging workflow with all services
|
||||
# Active simplified workflow: .github/workflows/cd-staging.yml
|
||||
#
|
||||
# Services included: mana-core-auth, chat-backend, manadeck-backend
|
||||
#
|
||||
# To restore: cp .github/workflows/cd-staging.full.yml .github/workflows/cd-staging.yml
|
||||
|
||||
name: CD - Staging Deployment
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
service:
|
||||
description: 'Service to deploy (leave empty for all)'
|
||||
required: false
|
||||
type: choice
|
||||
options:
|
||||
- all
|
||||
- mana-core-auth
|
||||
- chat-backend
|
||||
- manadeck-backend
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: read
|
||||
|
||||
env:
|
||||
NODE_VERSION: '20'
|
||||
PNPM_VERSION: '9.15.0'
|
||||
|
||||
jobs:
|
||||
deploy-staging:
|
||||
name: Deploy to Staging
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: staging
|
||||
url: https://staging.manacore.app
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup SSH for deployment
|
||||
uses: webfactory/ssh-agent@v0.9.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.STAGING_SSH_KEY }}
|
||||
|
||||
- name: Add staging server to known hosts
|
||||
env:
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
ssh-keyscan -H $STAGING_HOST >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Prepare deployment directory
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
ssh $STAGING_USER@$STAGING_HOST << 'EOF'
|
||||
mkdir -p ~/manacore-staging
|
||||
cd ~/manacore-staging
|
||||
|
||||
# Create required directories
|
||||
mkdir -p logs
|
||||
mkdir -p data/postgres
|
||||
mkdir -p data/redis
|
||||
EOF
|
||||
|
||||
- name: Copy docker-compose file
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
scp docker-compose.staging.yml $STAGING_USER@$STAGING_HOST:~/manacore-staging/docker-compose.yml
|
||||
|
||||
- name: Copy environment file
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
# Create staging env file (mix of hardcoded config and secrets)
|
||||
cat > .env.staging << EOF
|
||||
# Database - Configuration
|
||||
POSTGRES_HOST=postgres
|
||||
POSTGRES_PORT=5432
|
||||
POSTGRES_DB=manacore
|
||||
POSTGRES_USER=postgres
|
||||
POSTGRES_PASSWORD=${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
||||
|
||||
# Redis - Configuration
|
||||
REDIS_HOST=redis
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD=${{ secrets.STAGING_REDIS_PASSWORD }}
|
||||
|
||||
# Mana Core Auth - Configuration
|
||||
MANA_SERVICE_URL=http://mana-core-auth:3001
|
||||
JWT_SECRET=${{ secrets.STAGING_JWT_SECRET }}
|
||||
JWT_PUBLIC_KEY=${{ secrets.STAGING_JWT_PUBLIC_KEY }}
|
||||
JWT_PRIVATE_KEY=${{ secrets.STAGING_JWT_PRIVATE_KEY }}
|
||||
|
||||
# Supabase
|
||||
SUPABASE_URL=${{ secrets.STAGING_SUPABASE_URL }}
|
||||
SUPABASE_ANON_KEY=${{ secrets.STAGING_SUPABASE_ANON_KEY }}
|
||||
SUPABASE_SERVICE_ROLE_KEY=${{ secrets.STAGING_SUPABASE_SERVICE_ROLE_KEY }}
|
||||
|
||||
# Azure OpenAI
|
||||
AZURE_OPENAI_ENDPOINT=${{ secrets.STAGING_AZURE_OPENAI_ENDPOINT }}
|
||||
AZURE_OPENAI_API_KEY=${{ secrets.STAGING_AZURE_OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_API_VERSION=2024-12-01-preview
|
||||
|
||||
# Environment
|
||||
NODE_ENV=staging
|
||||
EOF
|
||||
|
||||
scp .env.staging $STAGING_USER@$STAGING_HOST:~/manacore-staging/.env
|
||||
rm .env.staging
|
||||
|
||||
- name: Login to GitHub Container Registry on staging server
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
ssh $STAGING_USER@$STAGING_HOST << EOF
|
||||
# Login to ghcr.io with GitHub token
|
||||
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
|
||||
EOF
|
||||
|
||||
- name: Pull latest Docker images
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
ssh $STAGING_USER@$STAGING_HOST << 'EOF'
|
||||
cd ~/manacore-staging
|
||||
docker compose pull
|
||||
EOF
|
||||
|
||||
- name: Deploy services
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
SERVICE="${{ github.event.inputs.service || 'all' }}"
|
||||
|
||||
ssh $STAGING_USER@$STAGING_HOST << EOF
|
||||
cd ~/manacore-staging
|
||||
|
||||
# Determine which services to deploy
|
||||
if [ "$SERVICE" == "all" ]; then
|
||||
echo "Deploying all services..."
|
||||
docker compose up -d
|
||||
else
|
||||
echo "Deploying service: $SERVICE"
|
||||
docker compose up -d $SERVICE
|
||||
fi
|
||||
|
||||
# Wait for initial startup
|
||||
echo "Waiting for services to start..."
|
||||
sleep 15
|
||||
|
||||
echo "=== Container Status ==="
|
||||
docker compose ps
|
||||
EOF
|
||||
|
||||
- name: Run health checks
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
ssh $STAGING_USER@$STAGING_HOST << 'EOF'
|
||||
cd ~/manacore-staging
|
||||
|
||||
# Wait for services to fully start
|
||||
echo "Waiting 60s for services to fully initialize..."
|
||||
sleep 60
|
||||
|
||||
echo "=== Container Status ==="
|
||||
docker compose ps
|
||||
|
||||
echo ""
|
||||
echo "=== Health Checks ==="
|
||||
|
||||
# Check mana-core-auth
|
||||
echo "Checking mana-core-auth..."
|
||||
if docker compose exec -T mana-core-auth wget -q -O - http://localhost:3001/api/v1/health > /dev/null 2>&1; then
|
||||
echo "✅ mana-core-auth is healthy"
|
||||
else
|
||||
echo "❌ mana-core-auth health check failed"
|
||||
echo "=== Logs ==="
|
||||
docker compose logs --tail=50 mana-core-auth
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check chat-backend
|
||||
echo "Checking chat-backend..."
|
||||
if docker compose exec -T chat-backend wget -q -O - http://localhost:3002/api/health > /dev/null 2>&1; then
|
||||
echo "✅ chat-backend is healthy"
|
||||
else
|
||||
echo "❌ chat-backend health check failed"
|
||||
echo "=== Logs ==="
|
||||
docker compose logs --tail=50 chat-backend
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check manadeck-backend
|
||||
echo "Checking manadeck-backend..."
|
||||
if docker compose exec -T manadeck-backend wget -q -O - http://localhost:3003/api/health > /dev/null 2>&1; then
|
||||
echo "✅ manadeck-backend is healthy"
|
||||
else
|
||||
echo "❌ manadeck-backend health check failed"
|
||||
echo "=== Logs ==="
|
||||
docker compose logs --tail=50 manadeck-backend
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "✅ All health checks passed!"
|
||||
EOF
|
||||
|
||||
- name: Run database migrations
|
||||
env:
|
||||
STAGING_USER: deploy
|
||||
STAGING_HOST: 46.224.108.214
|
||||
run: |
|
||||
# Run migrations for services that need them
|
||||
ssh $STAGING_USER@$STAGING_HOST << 'EOF'
|
||||
cd ~/manacore-staging
|
||||
|
||||
# Mana Core Auth migrations
|
||||
docker compose exec -T mana-core-auth pnpm run db:migrate || echo "Auth migrations skipped"
|
||||
EOF
|
||||
|
||||
- name: Deployment summary
|
||||
run: |
|
||||
echo "## Staging Deployment Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Environment**: Staging" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Deployed by**: ${{ github.actor }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Commit**: ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Timestamp**: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Services Deployed" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Service: ${{ github.event.inputs.service || 'all' }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Health Checks" >> $GITHUB_STEP_SUMMARY
|
||||
echo "All health checks passed ✅" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
notify-deployment:
|
||||
name: Notify Deployment
|
||||
runs-on: ubuntu-latest
|
||||
needs: deploy-staging
|
||||
if: always()
|
||||
steps:
|
||||
- name: Deployment notification
|
||||
run: |
|
||||
STATUS="${{ needs.deploy-staging.result }}"
|
||||
|
||||
if [ "$STATUS" == "success" ]; then
|
||||
echo "✅ Staging deployment completed successfully"
|
||||
else
|
||||
echo "❌ Staging deployment failed"
|
||||
exit 1
|
||||
fi
|
||||
168
.github/workflows/ci-main.yml.bak
vendored
168
.github/workflows/ci-main.yml.bak
vendored
|
|
@ -1,168 +0,0 @@
|
|||
name: CI - Main Branch
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
NODE_VERSION: '20'
|
||||
PNPM_VERSION: '9.15.0'
|
||||
TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }}
|
||||
TURBO_TEAM: ${{ secrets.TURBO_TEAM }}
|
||||
|
||||
jobs:
|
||||
# Full validation on main branch
|
||||
validate:
|
||||
name: Validate Main Branch
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build shared packages
|
||||
run: pnpm run build:packages
|
||||
|
||||
- name: Run format check
|
||||
run: pnpm run format:check
|
||||
|
||||
- name: Run lint
|
||||
run: pnpm run lint
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run type check
|
||||
run: pnpm run type-check
|
||||
|
||||
- name: Build all projects
|
||||
run: pnpm run build
|
||||
|
||||
- name: Run tests
|
||||
run: pnpm run test || echo "Some tests failed"
|
||||
continue-on-error: true
|
||||
|
||||
- name: Generate build summary
|
||||
run: |
|
||||
echo "## Main Branch Build Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Commit**: ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Author**: ${{ github.actor }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Timestamp**: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Build Status" >> $GITHUB_STEP_SUMMARY
|
||||
echo "All projects built successfully" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Build and push Docker images for backend services
|
||||
build-docker-images:
|
||||
name: Build Docker Images
|
||||
runs-on: ubuntu-latest
|
||||
needs: validate
|
||||
strategy:
|
||||
matrix:
|
||||
service:
|
||||
- { name: 'maerchenzauber-backend', path: 'apps/maerchenzauber/apps/backend', port: '3002' }
|
||||
- { name: 'chat-backend', path: 'apps/chat/apps/backend', port: '3002' }
|
||||
- { name: 'manadeck-backend', path: 'apps/manadeck/apps/backend', port: '3003' }
|
||||
- { name: 'nutriphi-backend', path: 'apps/nutriphi/apps/backend', port: '3004' }
|
||||
- { name: 'news-api', path: 'apps/news/apps/api', port: '3005' }
|
||||
- { name: 'mana-core-auth', path: 'services/mana-core-auth', port: '3001' }
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Check if Dockerfile exists
|
||||
id: check-dockerfile
|
||||
run: |
|
||||
if [ -f "${{ matrix.service.path }}/Dockerfile" ]; then
|
||||
echo "exists=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "exists=false" >> $GITHUB_OUTPUT
|
||||
echo "Warning: No Dockerfile found for ${{ matrix.service.name }}"
|
||||
fi
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
if: steps.check-dockerfile.outputs.exists == 'true'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
if: steps.check-dockerfile.outputs.exists == 'true'
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ghcr.io/${{ github.repository_owner }}/${{ matrix.service.name }}
|
||||
tags: |
|
||||
type=sha,prefix={{branch}}-
|
||||
type=ref,event=branch
|
||||
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
|
||||
|
||||
- name: Build and push
|
||||
if: steps.check-dockerfile.outputs.exists == 'true'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
file: ${{ matrix.service.path }}/Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
build-args: |
|
||||
NODE_ENV=production
|
||||
PORT=${{ matrix.service.port }}
|
||||
|
||||
- name: Image digest
|
||||
if: steps.check-dockerfile.outputs.exists == 'true'
|
||||
run: echo "Image digest - ${{ steps.meta.outputs.digest }}"
|
||||
|
||||
# Trigger staging deployment
|
||||
trigger-staging-deploy:
|
||||
name: Trigger Staging Deployment
|
||||
runs-on: ubuntu-latest
|
||||
needs: build-docker-images
|
||||
if: github.ref == 'refs/heads/main'
|
||||
steps:
|
||||
- name: Trigger staging deployment workflow
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
await github.rest.actions.createWorkflowDispatch({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
workflow_id: 'cd-staging.yml',
|
||||
ref: 'main'
|
||||
});
|
||||
|
||||
- name: Deployment notification
|
||||
run: |
|
||||
echo "## Staging Deployment Triggered" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Docker images have been built and pushed successfully." >> $GITHUB_STEP_SUMMARY
|
||||
echo "Staging deployment workflow has been triggered." >> $GITHUB_STEP_SUMMARY
|
||||
314
.github/workflows/ci-pull-request.yml.bak
vendored
314
.github/workflows/ci-pull-request.yml.bak
vendored
|
|
@ -1,314 +0,0 @@
|
|||
name: CI - Pull Request
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
types: [opened, synchronize, reopened]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
NODE_VERSION: '20'
|
||||
PNPM_VERSION: '9.15.0'
|
||||
TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }}
|
||||
TURBO_TEAM: ${{ secrets.TURBO_TEAM }}
|
||||
|
||||
jobs:
|
||||
# Detect which projects have changed
|
||||
detect-changes:
|
||||
name: Detect Changed Projects
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
projects: ${{ steps.filter.outputs.changes }}
|
||||
has-changes: ${{ steps.filter.outputs.changes != '[]' }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Detect changed projects
|
||||
uses: dorny/paths-filter@v3
|
||||
id: filter
|
||||
with:
|
||||
filters: |
|
||||
chat:
|
||||
- 'apps/chat/**'
|
||||
- 'packages/**'
|
||||
manacore:
|
||||
- 'apps/manacore/**'
|
||||
- 'packages/**'
|
||||
packages:
|
||||
- 'packages/**'
|
||||
|
||||
# Lint and format check
|
||||
lint-and-format:
|
||||
name: Lint & Format Check
|
||||
runs-on: ubuntu-latest
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.has-changes == 'true'
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Run format check
|
||||
run: pnpm run format:check
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run lint
|
||||
run: pnpm run lint --filter='./apps/chat/**' --filter='./apps/manacore/**'
|
||||
continue-on-error: true
|
||||
|
||||
# Type checking
|
||||
type-check:
|
||||
name: Type Check
|
||||
runs-on: ubuntu-latest
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.has-changes == 'true'
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build shared packages
|
||||
run: pnpm run build:packages
|
||||
|
||||
- name: Run type check
|
||||
run: pnpm run type-check --filter='./apps/chat/**' --filter='./apps/manacore/**'
|
||||
continue-on-error: true
|
||||
|
||||
# Build all affected projects
|
||||
build:
|
||||
name: Build Projects
|
||||
runs-on: ubuntu-latest
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.has-changes == 'true'
|
||||
strategy:
|
||||
matrix:
|
||||
project: ${{ fromJSON(needs.detect-changes.outputs.projects) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build shared packages
|
||||
run: pnpm run build:packages
|
||||
|
||||
- name: Build project - ${{ matrix.project }}
|
||||
run: |
|
||||
if [ "${{ matrix.project }}" == "packages" ]; then
|
||||
pnpm run build --filter=@manacore/*
|
||||
else
|
||||
pnpm run build --filter='./apps/${{ matrix.project }}/**'
|
||||
fi
|
||||
continue-on-error: true
|
||||
|
||||
- name: Upload build artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ matrix.project }}
|
||||
path: |
|
||||
apps/${{ matrix.project }}/**/dist
|
||||
apps/${{ matrix.project }}/**/.next
|
||||
apps/${{ matrix.project }}/**/.svelte-kit
|
||||
apps/${{ matrix.project }}/**/.astro
|
||||
services/**/dist
|
||||
retention-days: 7
|
||||
if-no-files-found: ignore
|
||||
|
||||
# Run tests
|
||||
test:
|
||||
name: Run Tests
|
||||
runs-on: ubuntu-latest
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.has-changes == 'true'
|
||||
strategy:
|
||||
matrix:
|
||||
project: ${{ fromJSON(needs.detect-changes.outputs.projects) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build shared packages
|
||||
run: pnpm run build:packages
|
||||
|
||||
- name: Run tests - ${{ matrix.project }}
|
||||
run: |
|
||||
if [ "${{ matrix.project }}" == "packages" ]; then
|
||||
pnpm run test --filter=@manacore/* || echo "No tests found for packages"
|
||||
else
|
||||
pnpm run test --filter='./apps/${{ matrix.project }}/**' || echo "No tests found for ${{ matrix.project }}"
|
||||
fi
|
||||
continue-on-error: true
|
||||
|
||||
- name: Upload test coverage
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: coverage-${{ matrix.project }}
|
||||
path: |
|
||||
apps/${{ matrix.project }}/**/coverage
|
||||
services/**/coverage
|
||||
retention-days: 7
|
||||
if-no-files-found: ignore
|
||||
|
||||
# Docker build validation for backend services
|
||||
docker-build-check:
|
||||
name: Docker Build Check
|
||||
runs-on: ubuntu-latest
|
||||
needs: detect-changes
|
||||
if: contains(needs.detect-changes.outputs.projects, 'chat')
|
||||
strategy:
|
||||
matrix:
|
||||
service:
|
||||
- { name: 'chat-backend', path: 'apps/chat/apps/backend' }
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Check if Dockerfile exists
|
||||
id: check-dockerfile
|
||||
run: |
|
||||
if [ -f "${{ matrix.service.path }}/Dockerfile" ]; then
|
||||
echo "exists=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "exists=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Build Docker image
|
||||
if: steps.check-dockerfile.outputs.exists == 'true'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
file: ${{ matrix.service.path }}/Dockerfile
|
||||
push: false
|
||||
tags: ${{ matrix.service.name }}:pr-${{ github.event.pull_request.number }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
build-args: |
|
||||
NODE_ENV=production
|
||||
|
||||
# Security scanning
|
||||
security-scan:
|
||||
name: Security Scan
|
||||
runs-on: ubuntu-latest
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.has-changes == 'true'
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Run security audit
|
||||
run: pnpm audit --audit-level=high
|
||||
continue-on-error: true
|
||||
|
||||
- name: Check for outdated dependencies
|
||||
run: pnpm outdated
|
||||
continue-on-error: true
|
||||
|
||||
# PR status check (required for merge)
|
||||
pr-checks-complete:
|
||||
name: All PR Checks Complete
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint-and-format, type-check, build, test, docker-build-check, security-scan]
|
||||
if: always()
|
||||
steps:
|
||||
- name: Check all jobs status
|
||||
run: |
|
||||
if [ "${{ needs.lint-and-format.result }}" == "failure" ] || \
|
||||
[ "${{ needs.type-check.result }}" == "failure" ] || \
|
||||
[ "${{ needs.build.result }}" == "failure" ]; then
|
||||
echo "One or more required checks failed"
|
||||
exit 1
|
||||
fi
|
||||
echo "All required checks passed"
|
||||
|
||||
- name: PR summary
|
||||
run: |
|
||||
echo "## PR Checks Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Check | Status |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "|-------|--------|" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Lint & Format | ${{ needs.lint-and-format.result }} |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Type Check | ${{ needs.type-check.result }} |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Build | ${{ needs.build.result }} |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Tests | ${{ needs.test.result }} |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Docker Build | ${{ needs.docker-build-check.result }} |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Security Scan | ${{ needs.security-scan.result }} |" >> $GITHUB_STEP_SUMMARY
|
||||
249
.github/workflows/dependency-update.yml.bak
vendored
249
.github/workflows/dependency-update.yml.bak
vendored
|
|
@ -1,249 +0,0 @@
|
|||
name: Dependency Updates
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run every Monday at 06:00 UTC
|
||||
- cron: '0 6 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
NODE_VERSION: '20'
|
||||
PNPM_VERSION: '9.15.0'
|
||||
|
||||
jobs:
|
||||
# Check for outdated dependencies
|
||||
check-outdated:
|
||||
name: Check Outdated Dependencies
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Check for outdated dependencies
|
||||
run: pnpm outdated --format json > outdated.json || true
|
||||
|
||||
- name: Generate outdated report
|
||||
run: |
|
||||
echo "## Outdated Dependencies Report" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Generated on: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
if [ -f outdated.json ] && [ -s outdated.json ]; then
|
||||
echo "### Packages to Update" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
cat outdated.json | jq -r 'to_entries[] | "- **\(.key)**: \(.value.current) → \(.value.latest)"' >> $GITHUB_STEP_SUMMARY || echo "No outdated packages found" >> $GITHUB_STEP_SUMMARY
|
||||
else
|
||||
echo "✅ All dependencies are up to date!" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
|
||||
- name: Upload outdated report
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: outdated-dependencies
|
||||
path: outdated.json
|
||||
retention-days: 30
|
||||
if: always()
|
||||
|
||||
# Security audit
|
||||
security-audit:
|
||||
name: Security Audit
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Run security audit
|
||||
run: |
|
||||
pnpm audit --json > audit-report.json || true
|
||||
pnpm audit --audit-level=moderate || echo "Security vulnerabilities found"
|
||||
|
||||
- name: Generate security report
|
||||
run: |
|
||||
echo "## Security Audit Report" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Generated on: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
if [ -f audit-report.json ]; then
|
||||
# Parse audit report
|
||||
CRITICAL=$(jq -r '.metadata.vulnerabilities.critical // 0' audit-report.json)
|
||||
HIGH=$(jq -r '.metadata.vulnerabilities.high // 0' audit-report.json)
|
||||
MODERATE=$(jq -r '.metadata.vulnerabilities.moderate // 0' audit-report.json)
|
||||
LOW=$(jq -r '.metadata.vulnerabilities.low // 0' audit-report.json)
|
||||
|
||||
echo "| Severity | Count |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "|----------|-------|" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Critical | $CRITICAL |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| High | $HIGH |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Moderate | $MODERATE |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Low | $LOW |" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
if [ "$CRITICAL" -gt 0 ] || [ "$HIGH" -gt 0 ]; then
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "⚠️ **Action Required**: Critical or high severity vulnerabilities detected!" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Upload security audit
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: security-audit
|
||||
path: audit-report.json
|
||||
retention-days: 90
|
||||
if: always()
|
||||
|
||||
- name: Create issue for critical vulnerabilities
|
||||
if: always()
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
|
||||
if (!fs.existsSync('audit-report.json')) {
|
||||
console.log('No audit report found');
|
||||
return;
|
||||
}
|
||||
|
||||
const auditData = JSON.parse(fs.readFileSync('audit-report.json', 'utf8'));
|
||||
const critical = auditData.metadata?.vulnerabilities?.critical || 0;
|
||||
const high = auditData.metadata?.vulnerabilities?.high || 0;
|
||||
|
||||
if (critical > 0 || high > 0) {
|
||||
const issueTitle = `🚨 Security Alert: ${critical} Critical, ${high} High Severity Vulnerabilities`;
|
||||
const issueBody = `
|
||||
## Security Vulnerability Report
|
||||
|
||||
**Date**: ${new Date().toISOString()}
|
||||
**Workflow Run**: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
|
||||
### Summary
|
||||
- Critical: ${critical}
|
||||
- High: ${high}
|
||||
- Moderate: ${auditData.metadata?.vulnerabilities?.moderate || 0}
|
||||
- Low: ${auditData.metadata?.vulnerabilities?.low || 0}
|
||||
|
||||
### Action Required
|
||||
Please review the security audit report and update affected dependencies.
|
||||
|
||||
\`\`\`bash
|
||||
pnpm audit
|
||||
pnpm audit fix
|
||||
\`\`\`
|
||||
|
||||
**Note**: This issue was automatically created by the dependency update workflow.
|
||||
`;
|
||||
|
||||
// Check if similar issue exists
|
||||
const { data: existingIssues } = await github.rest.issues.listForRepo({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
state: 'open',
|
||||
labels: 'security,automated'
|
||||
});
|
||||
|
||||
const hasExistingIssue = existingIssues.some(issue =>
|
||||
issue.title.includes('Security Alert')
|
||||
);
|
||||
|
||||
if (!hasExistingIssue) {
|
||||
await github.rest.issues.create({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
title: issueTitle,
|
||||
body: issueBody,
|
||||
labels: ['security', 'automated', 'high-priority']
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
# Update lock file
|
||||
update-lockfile:
|
||||
name: Update Lock File
|
||||
runs-on: ubuntu-latest
|
||||
needs: [check-outdated, security-audit]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Update lock file
|
||||
run: |
|
||||
# Update lock file without changing package.json versions
|
||||
pnpm install --no-frozen-lockfile
|
||||
|
||||
- name: Check for changes
|
||||
id: changes
|
||||
run: |
|
||||
if git diff --quiet pnpm-lock.yaml; then
|
||||
echo "has-changes=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "has-changes=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Create Pull Request
|
||||
if: steps.changes.outputs.has-changes == 'true'
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
commit-message: "chore: update pnpm-lock.yaml"
|
||||
title: "chore: Update dependency lock file"
|
||||
body: |
|
||||
## Dependency Lock File Update
|
||||
|
||||
This PR updates the `pnpm-lock.yaml` file to reflect the latest compatible versions.
|
||||
|
||||
### Changes
|
||||
- Updated lock file to latest compatible versions
|
||||
- No breaking changes to package.json
|
||||
|
||||
### Testing
|
||||
- [ ] All CI checks pass
|
||||
- [ ] Manual testing completed
|
||||
|
||||
**Note**: This PR was automatically created by the dependency update workflow.
|
||||
branch: chore/update-lockfile
|
||||
labels: |
|
||||
dependencies
|
||||
automated
|
||||
assignees: wuesteon
|
||||
103
.github/workflows/staging-config-check.yml
vendored
103
.github/workflows/staging-config-check.yml
vendored
|
|
@ -1,103 +0,0 @@
|
|||
name: Staging Config Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'docker-compose.staging.yml'
|
||||
- 'docker/caddy/Caddyfile.staging'
|
||||
|
||||
jobs:
|
||||
check-staging-urls:
|
||||
name: Validate Staging URLs
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Check for HTTP IP addresses in _CLIENT URLs
|
||||
run: |
|
||||
echo "Checking docker-compose.staging.yml for HTTP IP addresses..."
|
||||
|
||||
# Check that no _CLIENT URLs use HTTP IP addresses
|
||||
if grep -E '_CLIENT:.*http://[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' docker-compose.staging.yml; then
|
||||
echo ""
|
||||
echo "::error::Found HTTP IP addresses in _CLIENT URLs!"
|
||||
echo "All _CLIENT URLs must use HTTPS staging domains (e.g., https://auth.staging.manacore.ai)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "No HTTP IP addresses found in _CLIENT URLs"
|
||||
|
||||
- name: Check for non-HTTPS external URLs
|
||||
run: |
|
||||
echo "Checking for non-HTTPS external URLs in _CLIENT variables..."
|
||||
|
||||
# Check that _CLIENT URLs use HTTPS (excluding localhost for dev)
|
||||
VIOLATIONS=$(grep -E '_CLIENT:.*http://' docker-compose.staging.yml | grep -v localhost || true)
|
||||
|
||||
if [ -n "$VIOLATIONS" ]; then
|
||||
echo ""
|
||||
echo "::error::Found non-HTTPS URLs in _CLIENT variables!"
|
||||
echo "$VIOLATIONS"
|
||||
echo ""
|
||||
echo "All _CLIENT URLs must use HTTPS for staging domains."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "All _CLIENT URLs use HTTPS"
|
||||
|
||||
- name: Verify required HTTPS domains
|
||||
run: |
|
||||
echo "Verifying required HTTPS staging domains are configured..."
|
||||
|
||||
REQUIRED_DOMAINS=(
|
||||
"https://auth.staging.manacore.ai"
|
||||
"https://staging.manacore.ai"
|
||||
)
|
||||
|
||||
MISSING=0
|
||||
for domain in "${REQUIRED_DOMAINS[@]}"; do
|
||||
if ! grep -q "$domain" docker-compose.staging.yml; then
|
||||
echo "::warning::Missing required domain: $domain"
|
||||
MISSING=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $MISSING -eq 1 ]; then
|
||||
echo ""
|
||||
echo "::warning::Some required staging domains are not configured. Please verify this is intentional."
|
||||
fi
|
||||
|
||||
echo "Domain verification complete"
|
||||
|
||||
- name: Check CORS origins include HTTPS
|
||||
run: |
|
||||
echo "Checking CORS_ORIGINS for HTTPS staging domains..."
|
||||
|
||||
# Extract CORS_ORIGINS lines and check they include staging domains
|
||||
CORS_LINES=$(grep "CORS_ORIGINS:" docker-compose.staging.yml || true)
|
||||
|
||||
if [ -n "$CORS_LINES" ]; then
|
||||
# Check if any CORS line has HTTP staging domains (not localhost)
|
||||
HTTP_CORS=$(echo "$CORS_LINES" | grep -E 'http://[a-z]+\.staging\.manacore\.ai' || true)
|
||||
|
||||
if [ -n "$HTTP_CORS" ]; then
|
||||
echo ""
|
||||
echo "::error::Found HTTP (non-HTTPS) staging domains in CORS_ORIGINS!"
|
||||
echo "$HTTP_CORS"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "CORS origins are correctly configured"
|
||||
|
||||
- name: Summary
|
||||
run: |
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo "Staging Configuration Check: PASSED"
|
||||
echo "======================================"
|
||||
echo ""
|
||||
echo "All checks passed:"
|
||||
echo " - No HTTP IP addresses in _CLIENT URLs"
|
||||
echo " - All external _CLIENT URLs use HTTPS"
|
||||
echo " - CORS origins correctly configured"
|
||||
180
.github/workflows/test-coverage.yml.bak
vendored
180
.github/workflows/test-coverage.yml.bak
vendored
|
|
@ -1,180 +0,0 @@
|
|||
name: Test Coverage
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
schedule:
|
||||
# Run weekly on Sundays at 00:00 UTC
|
||||
- cron: '0 0 * * 0'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
NODE_VERSION: '20'
|
||||
PNPM_VERSION: '9.15.0'
|
||||
TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }}
|
||||
TURBO_TEAM: ${{ secrets.TURBO_TEAM }}
|
||||
|
||||
jobs:
|
||||
test-coverage:
|
||||
name: Test Coverage
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build shared packages
|
||||
run: pnpm run build:packages
|
||||
|
||||
- name: Run tests with coverage
|
||||
run: pnpm run test --coverage || echo "Some tests failed"
|
||||
continue-on-error: true
|
||||
|
||||
- name: Collect coverage reports
|
||||
run: |
|
||||
# Find all coverage directories
|
||||
find . -type d -name coverage \( -path "*/apps/*/apps/*" -o -path "*/services/*" \) > coverage_dirs.txt
|
||||
|
||||
# Create combined coverage directory
|
||||
mkdir -p coverage-combined
|
||||
|
||||
# Copy all coverage files
|
||||
while IFS= read -r dir; do
|
||||
if [ -f "$dir/coverage-final.json" ]; then
|
||||
PROJECT=$(echo $dir | sed 's|./apps/||' | sed 's|./services/||' | sed 's|/coverage||' | tr '/' '-')
|
||||
cp "$dir/coverage-final.json" "coverage-combined/coverage-$PROJECT.json"
|
||||
fi
|
||||
done < coverage_dirs.txt
|
||||
|
||||
- name: Generate coverage summary
|
||||
run: |
|
||||
echo "## Test Coverage Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Find and parse coverage summaries
|
||||
find . -type f -name "coverage-summary.json" | while read -r file; do
|
||||
PROJECT=$(dirname $file | sed 's|./apps/||' | sed 's|./services/||' | sed 's|/coverage||')
|
||||
|
||||
if [ -f "$file" ]; then
|
||||
LINES=$(jq -r '.total.lines.pct' "$file" 2>/dev/null || echo "0")
|
||||
STATEMENTS=$(jq -r '.total.statements.pct' "$file" 2>/dev/null || echo "0")
|
||||
FUNCTIONS=$(jq -r '.total.functions.pct' "$file" 2>/dev/null || echo "0")
|
||||
BRANCHES=$(jq -r '.total.branches.pct' "$file" 2>/dev/null || echo "0")
|
||||
|
||||
echo "### $PROJECT" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Metric | Coverage |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "|--------|----------|" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Lines | ${LINES}% |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Statements | ${STATEMENTS}% |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Functions | ${FUNCTIONS}% |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Branches | ${BRANCHES}% |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Archive coverage reports
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: coverage-reports
|
||||
path: |
|
||||
apps/**/coverage
|
||||
services/**/coverage
|
||||
coverage-combined
|
||||
retention-days: 30
|
||||
if-no-files-found: warn
|
||||
|
||||
- name: Check coverage thresholds
|
||||
run: |
|
||||
echo "Checking coverage thresholds..."
|
||||
|
||||
# Set minimum coverage threshold
|
||||
MINIMUM_COVERAGE=50 # Start with 50%, increase gradually
|
||||
|
||||
# Check each project's coverage
|
||||
find . -type f -name "coverage-summary.json" | while read -r file; do
|
||||
PROJECT=$(dirname $file | sed 's|./apps/||' | sed 's|./services/||' | sed 's|/coverage||')
|
||||
LINES=$(jq -r '.total.lines.pct' "$file" 2>/dev/null || echo "0")
|
||||
|
||||
echo "Checking $PROJECT: ${LINES}% coverage"
|
||||
|
||||
# Convert to integer for comparison
|
||||
LINES_INT=$(printf "%.0f" $LINES)
|
||||
|
||||
if [ "$LINES_INT" -lt "$MINIMUM_COVERAGE" ]; then
|
||||
echo "⚠️ Warning: $PROJECT coverage (${LINES}%) is below minimum threshold (${MINIMUM_COVERAGE}%)"
|
||||
else
|
||||
echo "✅ $PROJECT meets coverage threshold"
|
||||
fi
|
||||
done
|
||||
|
||||
# Generate coverage badge
|
||||
coverage-badge:
|
||||
name: Update Coverage Badge
|
||||
runs-on: ubuntu-latest
|
||||
needs: test-coverage
|
||||
if: github.ref == 'refs/heads/main'
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Download coverage reports
|
||||
uses: actions/download-artifact@v4
|
||||
continue-on-error: true
|
||||
id: download-coverage
|
||||
with:
|
||||
name: coverage-reports
|
||||
path: coverage-reports
|
||||
|
||||
- name: Create coverage badge
|
||||
if: steps.download-coverage.outcome == 'success'
|
||||
run: |
|
||||
# Calculate overall coverage
|
||||
TOTAL_LINES=0
|
||||
COVERED_LINES=0
|
||||
|
||||
find coverage-reports -type f -name "coverage-summary.json" | while read -r file; do
|
||||
LINES=$(jq -r '.total.lines.total' "$file" 2>/dev/null || echo "0")
|
||||
COVERED=$(jq -r '.total.lines.covered' "$file" 2>/dev/null || echo "0")
|
||||
|
||||
TOTAL_LINES=$((TOTAL_LINES + LINES))
|
||||
COVERED_LINES=$((COVERED_LINES + COVERED))
|
||||
done
|
||||
|
||||
if [ "$TOTAL_LINES" -gt 0 ]; then
|
||||
COVERAGE=$(echo "scale=2; $COVERED_LINES * 100 / $TOTAL_LINES" | bc)
|
||||
echo "Overall coverage: ${COVERAGE}%"
|
||||
echo "COVERAGE=${COVERAGE}" >> $GITHUB_ENV
|
||||
else
|
||||
echo "No coverage data found"
|
||||
echo "COVERAGE=0" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Update README badge
|
||||
if: steps.download-coverage.outcome == 'success'
|
||||
run: |
|
||||
echo "Coverage badge data ready: ${{ env.COVERAGE }}%"
|
||||
# This would update a badge in the README or create a gist
|
||||
# Implementation depends on chosen badge service (shields.io, codecov, etc.)
|
||||
|
||||
- name: Skip badge update
|
||||
if: steps.download-coverage.outcome != 'success'
|
||||
run: echo "No coverage reports available - skipping badge update"
|
||||
389
.github/workflows/test.yml.bak
vendored
389
.github/workflows/test.yml.bak
vendored
|
|
@ -1,389 +0,0 @@
|
|||
name: Test Suite
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main, develop]
|
||||
push:
|
||||
branches: [main, develop]
|
||||
workflow_dispatch:
|
||||
|
||||
# Cancel in-progress runs for same PR/branch
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
NODE_VERSION: '20'
|
||||
PNPM_VERSION: '9.15.0'
|
||||
|
||||
jobs:
|
||||
# ====================
|
||||
# 1. TEST BACKENDS
|
||||
# ====================
|
||||
test-backends:
|
||||
name: Test Backend - ${{ matrix.project }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
project:
|
||||
- maerchenzauber
|
||||
- manadeck
|
||||
- chat
|
||||
- nutriphi
|
||||
- picture
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v2
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build shared packages
|
||||
run: pnpm run build:packages
|
||||
|
||||
- name: Type check
|
||||
run: pnpm --filter @${{ matrix.project }}/backend type-check
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests with coverage
|
||||
run: pnpm --filter @${{ matrix.project }}/backend test:cov
|
||||
env:
|
||||
NODE_ENV: test
|
||||
|
||||
- name: Check coverage thresholds
|
||||
run: |
|
||||
echo "Checking coverage meets 80% threshold..."
|
||||
# Jest/Vitest will fail if thresholds aren't met
|
||||
|
||||
# ====================
|
||||
# 2. TEST MOBILE APPS
|
||||
# ====================
|
||||
test-mobile:
|
||||
name: Test Mobile - ${{ matrix.project }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
project:
|
||||
- maerchenzauber
|
||||
- memoro
|
||||
- picture
|
||||
- chat
|
||||
- manacore
|
||||
- manadeck
|
||||
- nutriphi
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v2
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build shared packages
|
||||
run: pnpm run build:packages
|
||||
|
||||
- name: Type check
|
||||
run: pnpm --filter @${{ matrix.project }}/mobile type-check
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests with coverage
|
||||
run: pnpm --filter @${{ matrix.project }}/mobile test -- --coverage --watchAll=false --ci
|
||||
env:
|
||||
NODE_ENV: test
|
||||
|
||||
# ====================
|
||||
# 3. TEST WEB APPS
|
||||
# ====================
|
||||
test-web:
|
||||
name: Test Web - ${{ matrix.project }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
project:
|
||||
- maerchenzauber
|
||||
- manacore
|
||||
- memoro
|
||||
- picture
|
||||
- uload
|
||||
- chat
|
||||
- manadeck
|
||||
- nutriphi
|
||||
- news
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v2
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build shared packages
|
||||
run: pnpm run build:packages
|
||||
|
||||
- name: Type check
|
||||
run: pnpm --filter @${{ matrix.project }}/web check
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run unit tests with coverage
|
||||
run: pnpm --filter @${{ matrix.project }}/web test:unit -- --coverage --run
|
||||
env:
|
||||
NODE_ENV: test
|
||||
|
||||
# ====================
|
||||
# 4. E2E TESTS (WEB)
|
||||
# ====================
|
||||
test-e2e-web:
|
||||
name: E2E Web - ${{ matrix.project }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
project:
|
||||
- uload
|
||||
# Add other projects with E2E tests
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v2
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build shared packages
|
||||
run: pnpm run build:packages
|
||||
|
||||
- name: Install Playwright browsers
|
||||
run: pnpm --filter @${{ matrix.project }}/web exec playwright install --with-deps chromium
|
||||
|
||||
- name: Build application
|
||||
run: pnpm --filter @${{ matrix.project }}/web build
|
||||
|
||||
- name: Run E2E tests
|
||||
run: pnpm --filter @${{ matrix.project }}/web test:e2e
|
||||
env:
|
||||
CI: true
|
||||
|
||||
- name: Upload Playwright report
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: playwright-report-${{ matrix.project }}
|
||||
path: ./apps/${{ matrix.project }}/apps/web/playwright-report/
|
||||
retention-days: 7
|
||||
|
||||
# ====================
|
||||
# 5. TEST SHARED PACKAGES
|
||||
# ====================
|
||||
test-shared-packages:
|
||||
name: Test Shared Packages
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v2
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build shared packages
|
||||
run: pnpm run build:packages
|
||||
|
||||
- name: Type check shared packages
|
||||
run: pnpm --filter './packages/*' type-check
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests with coverage
|
||||
run: pnpm --filter './packages/*' test -- --coverage --run
|
||||
continue-on-error: true
|
||||
env:
|
||||
NODE_ENV: test
|
||||
|
||||
# ====================
|
||||
# 6. LINT & FORMAT CHECK
|
||||
# ====================
|
||||
lint-and-format:
|
||||
name: Lint & Format
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v2
|
||||
with:
|
||||
version: ${{ env.PNPM_VERSION }}
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build shared packages
|
||||
run: pnpm run build:packages
|
||||
|
||||
- name: Check formatting
|
||||
run: pnpm run format:check
|
||||
|
||||
- name: Run linters
|
||||
run: pnpm run lint
|
||||
continue-on-error: true
|
||||
|
||||
# ====================
|
||||
# 7. COVERAGE REPORT
|
||||
# ====================
|
||||
coverage-report:
|
||||
name: Generate Coverage Report
|
||||
needs:
|
||||
- test-backends
|
||||
- test-mobile
|
||||
- test-web
|
||||
- test-shared-packages
|
||||
runs-on: ubuntu-latest
|
||||
if: always()
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Download all coverage reports
|
||||
uses: actions/download-artifact@v4
|
||||
continue-on-error: true
|
||||
|
||||
- name: Generate coverage summary
|
||||
run: |
|
||||
echo "## 📊 Test Coverage Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Jobs Status" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Backend Tests: ${{ needs.test-backends.result }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Mobile Tests: ${{ needs.test-mobile.result }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Web Tests: ${{ needs.test-web.result }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Shared Packages Tests: ${{ needs.test-shared-packages.result }}" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# ====================
|
||||
# 8. TEST STATUS CHECK
|
||||
# ====================
|
||||
test-status:
|
||||
name: All Tests Status
|
||||
needs:
|
||||
- test-backends
|
||||
- test-mobile
|
||||
- test-web
|
||||
- test-shared-packages
|
||||
- lint-and-format
|
||||
runs-on: ubuntu-latest
|
||||
if: always()
|
||||
|
||||
steps:
|
||||
- name: Check test results
|
||||
run: |
|
||||
if [ "${{ needs.test-backends.result }}" != "success" ] || \
|
||||
[ "${{ needs.test-mobile.result }}" != "success" ] || \
|
||||
[ "${{ needs.test-web.result }}" != "success" ] || \
|
||||
[ "${{ needs.test-shared-packages.result }}" != "success" ]; then
|
||||
echo "❌ Some tests failed"
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ All tests passed"
|
||||
|
||||
- name: Post PR comment
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const status = '${{ needs.test-status.result }}' === 'success' ? '✅' : '❌';
|
||||
const body = `## ${status} Test Suite Results
|
||||
|
||||
**Status**: ${status === '✅' ? 'All tests passed!' : 'Some tests failed'}
|
||||
|
||||
### Test Coverage
|
||||
- Backend: ${{ needs.test-backends.result }}
|
||||
- Mobile: ${{ needs.test-mobile.result }}
|
||||
- Web: ${{ needs.test-web.result }}
|
||||
- Shared Packages: ${{ needs.test-shared-packages.result }}
|
||||
- Lint & Format: ${{ needs.lint-and-format.result }}
|
||||
|
||||
View detailed results in the [Actions tab](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
|
||||
`;
|
||||
|
||||
github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body
|
||||
});
|
||||
28
CLAUDE.md
28
CLAUDE.md
|
|
@ -549,16 +549,34 @@ npx wrangler pages project add-domain chat-landing chat.manacore.app
|
|||
|
||||
## Server Access
|
||||
|
||||
### Hetzner Staging Server
|
||||
### Mac Mini Production Server
|
||||
|
||||
SSH access for deployment troubleshooting, log inspection, and service management:
|
||||
The production environment runs on a Mac Mini, accessible via Cloudflare Tunnel.
|
||||
|
||||
**Domain:** mana.how
|
||||
**SSH:** `ssh mana-server` (requires cloudflared and SSH config)
|
||||
|
||||
```bash
|
||||
ssh -i ~/.ssh/hetzner_deploy_key deploy@46.224.108.214
|
||||
# SSH config (~/.ssh/config)
|
||||
Host mana-server
|
||||
HostName mac-mini.mana.how
|
||||
User till
|
||||
ProxyCommand /opt/homebrew/bin/cloudflared access ssh --hostname %h
|
||||
```
|
||||
|
||||
**User:** `deploy`
|
||||
**Key:** `~/.ssh/hetzner_deploy_key`
|
||||
#### Useful Commands
|
||||
|
||||
```bash
|
||||
ssh mana-server # Connect to server
|
||||
cd ~/projects/manacore-monorepo
|
||||
|
||||
./scripts/mac-mini/status.sh # Check all services
|
||||
./scripts/mac-mini/deploy.sh # Pull & restart containers
|
||||
./scripts/mac-mini/health-check.sh # Run health checks
|
||||
docker compose -f docker-compose.macmini.yml logs -f # View logs
|
||||
```
|
||||
|
||||
For detailed server documentation, see **[docs/MAC_MINI_SERVER.md](docs/MAC_MINI_SERVER.md)**.
|
||||
|
||||
## Adding Dependencies
|
||||
|
||||
|
|
|
|||
|
|
@ -723,6 +723,32 @@ services:
|
|||
retries: 3
|
||||
start_period: 40s
|
||||
|
||||
# ============================================
|
||||
# Auto-Update (Watchtower)
|
||||
# ============================================
|
||||
|
||||
watchtower:
|
||||
image: containrrr/watchtower
|
||||
container_name: manacore-watchtower
|
||||
restart: always
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- ~/.docker/config.json:/config.json:ro
|
||||
environment:
|
||||
TZ: Europe/Berlin
|
||||
WATCHTOWER_POLL_INTERVAL: 300 # Check every 5 minutes
|
||||
WATCHTOWER_CLEANUP: "true" # Remove old images
|
||||
WATCHTOWER_INCLUDE_STOPPED: "false" # Only update running containers
|
||||
WATCHTOWER_NO_STARTUP_MESSAGE: "false" # Log startup message
|
||||
WATCHTOWER_NOTIFICATIONS: shoutrrr
|
||||
WATCHTOWER_NOTIFICATION_URL: ${WATCHTOWER_NOTIFICATION_URL:-} # Optional: telegram://token@telegram?chats=chatid
|
||||
WATCHTOWER_NOTIFICATION_TEMPLATE: |
|
||||
{{- if .Updated -}}
|
||||
🚀 *ManaCore Update*
|
||||
Updated: {{range .Updated}}{{.Name}} {{end}}
|
||||
{{- end -}}
|
||||
command: --label-enable=false # Update all containers (not just labeled ones)
|
||||
|
||||
# ============================================
|
||||
# Volumes
|
||||
# ============================================
|
||||
|
|
|
|||
|
|
@ -1,429 +0,0 @@
|
|||
# ManaCore Production Configuration
|
||||
# Domain: mana.how
|
||||
# Server: 46.224.108.214
|
||||
#
|
||||
# This replaces the staging environment as production.
|
||||
# Apps: mana-core-auth, manacore-web, chat, todo, calendar, clock
|
||||
|
||||
services:
|
||||
# ============================================
|
||||
# Infrastructure Services
|
||||
# ============================================
|
||||
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: manacore-postgres-prod
|
||||
restart: always
|
||||
environment:
|
||||
POSTGRES_DB: ${POSTGRES_DB:-manacore}
|
||||
POSTGRES_USER: ${POSTGRES_USER:-postgres}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
ports:
|
||||
- "127.0.0.1:5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- manacore-network
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: manacore-redis-prod
|
||||
restart: always
|
||||
command: redis-server --requirepass ${REDIS_PASSWORD}
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
ports:
|
||||
- "127.0.0.1:6379:6379"
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "--raw", "incr", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- manacore-network
|
||||
|
||||
# ============================================
|
||||
# Auth Service
|
||||
# ============================================
|
||||
|
||||
mana-core-auth:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/mana-core-auth:${AUTH_VERSION:-latest}
|
||||
container_name: mana-core-auth-prod
|
||||
restart: always
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
PORT: 3001
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/manacore_auth
|
||||
REDIS_HOST: redis
|
||||
REDIS_PORT: 6379
|
||||
REDIS_PASSWORD: ${REDIS_PASSWORD}
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
JWT_PUBLIC_KEY: ${JWT_PUBLIC_KEY}
|
||||
JWT_PRIVATE_KEY: ${JWT_PRIVATE_KEY}
|
||||
# CORS - Production domains only
|
||||
CORS_ORIGINS: https://mana.how,https://chat.mana.how,https://todo.mana.how,https://calendar.mana.how,https://clock.mana.how
|
||||
ports:
|
||||
- "3001:3001"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3001/api/v1/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "5"
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 512M
|
||||
|
||||
# ============================================
|
||||
# ManaCore Dashboard
|
||||
# ============================================
|
||||
|
||||
manacore-web:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/manacore-web:${MANACORE_WEB_VERSION:-latest}
|
||||
container_name: manacore-web-prod
|
||||
restart: always
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
PORT: 5173
|
||||
# Auth URLs
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: https://auth.mana.how
|
||||
# Backend URLs for dashboard widgets
|
||||
PUBLIC_TODO_API_URL: http://todo-backend:3018
|
||||
PUBLIC_TODO_API_URL_CLIENT: https://todo-api.mana.how
|
||||
PUBLIC_CALENDAR_API_URL: http://calendar-backend:3016
|
||||
PUBLIC_CALENDAR_API_URL_CLIENT: https://calendar-api.mana.how
|
||||
PUBLIC_CLOCK_API_URL: http://clock-backend:3017
|
||||
PUBLIC_CLOCK_API_URL_CLIENT: https://clock-api.mana.how
|
||||
ports:
|
||||
- "5173:5173"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5173/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "5"
|
||||
|
||||
# ============================================
|
||||
# Chat App
|
||||
# ============================================
|
||||
|
||||
chat-backend:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/chat-backend:${CHAT_VERSION:-latest}
|
||||
container_name: chat-backend-prod
|
||||
restart: always
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
PORT: 3002
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/chat
|
||||
MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
SUPABASE_URL: ${SUPABASE_URL}
|
||||
SUPABASE_SERVICE_KEY: ${SUPABASE_SERVICE_ROLE_KEY}
|
||||
AZURE_OPENAI_ENDPOINT: ${AZURE_OPENAI_ENDPOINT}
|
||||
AZURE_OPENAI_API_KEY: ${AZURE_OPENAI_API_KEY}
|
||||
AZURE_OPENAI_API_VERSION: ${AZURE_OPENAI_API_VERSION:-2024-12-01-preview}
|
||||
CORS_ORIGINS: https://chat.mana.how,https://mana.how
|
||||
ports:
|
||||
- "3002:3002"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3002/api/v1/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "5"
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 1G
|
||||
|
||||
chat-web:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/chat-web:${CHAT_WEB_VERSION:-latest}
|
||||
container_name: chat-web-prod
|
||||
restart: always
|
||||
depends_on:
|
||||
chat-backend:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
PORT: 3000
|
||||
PUBLIC_BACKEND_URL: http://chat-backend:3002
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
PUBLIC_BACKEND_URL_CLIENT: https://chat-api.mana.how
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: https://auth.mana.how
|
||||
ports:
|
||||
- "3000:3000"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "5"
|
||||
|
||||
# ============================================
|
||||
# Todo App
|
||||
# ============================================
|
||||
|
||||
todo-backend:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/todo-backend:${TODO_BACKEND_VERSION:-latest}
|
||||
container_name: todo-backend-prod
|
||||
restart: always
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
PORT: 3018
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/todo
|
||||
MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
CORS_ORIGINS: https://todo.mana.how,https://mana.how
|
||||
ports:
|
||||
- "3018:3018"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3018/api/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "5"
|
||||
|
||||
todo-web:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/todo-web:${TODO_WEB_VERSION:-latest}
|
||||
container_name: todo-web-prod
|
||||
restart: always
|
||||
depends_on:
|
||||
todo-backend:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
PORT: 5188
|
||||
PUBLIC_BACKEND_URL: http://todo-backend:3018
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
PUBLIC_BACKEND_URL_CLIENT: https://todo-api.mana.how
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: https://auth.mana.how
|
||||
ports:
|
||||
- "5188:5188"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5188/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "5"
|
||||
|
||||
# ============================================
|
||||
# Calendar App
|
||||
# ============================================
|
||||
|
||||
calendar-backend:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/calendar-backend:${CALENDAR_VERSION:-latest}
|
||||
container_name: calendar-backend-prod
|
||||
restart: always
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
PORT: 3016
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/calendar
|
||||
DB_HOST: postgres
|
||||
DB_PORT: 5432
|
||||
DB_USER: ${POSTGRES_USER:-postgres}
|
||||
MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
CORS_ORIGINS: https://calendar.mana.how,https://mana.how
|
||||
ports:
|
||||
- "3016:3016"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3016/api/v1/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "5"
|
||||
|
||||
calendar-web:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/calendar-web:${CALENDAR_WEB_VERSION:-latest}
|
||||
container_name: calendar-web-prod
|
||||
restart: always
|
||||
depends_on:
|
||||
calendar-backend:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
PORT: 5186
|
||||
PUBLIC_BACKEND_URL: http://calendar-backend:3016
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
PUBLIC_BACKEND_URL_CLIENT: https://calendar-api.mana.how
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: https://auth.mana.how
|
||||
ports:
|
||||
- "5186:5186"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5186/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "5"
|
||||
|
||||
# ============================================
|
||||
# Clock App
|
||||
# ============================================
|
||||
|
||||
clock-backend:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/clock-backend:${CLOCK_VERSION:-latest}
|
||||
container_name: clock-backend-prod
|
||||
restart: always
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
PORT: 3017
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/clock
|
||||
DB_HOST: postgres
|
||||
DB_PORT: 5432
|
||||
DB_USER: ${POSTGRES_USER:-postgres}
|
||||
MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
CORS_ORIGINS: https://clock.mana.how,https://mana.how
|
||||
ports:
|
||||
- "3017:3017"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3017/api/v1/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "5"
|
||||
|
||||
clock-web:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/clock-web:${CLOCK_WEB_VERSION:-latest}
|
||||
container_name: clock-web-prod
|
||||
restart: always
|
||||
depends_on:
|
||||
clock-backend:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
PORT: 5187
|
||||
PUBLIC_BACKEND_URL: http://clock-backend:3017
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
PUBLIC_BACKEND_URL_CLIENT: https://clock-api.mana.how
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: https://auth.mana.how
|
||||
ports:
|
||||
- "5187:5187"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5187/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "50m"
|
||||
max-file: "5"
|
||||
|
||||
# ============================================
|
||||
# Networks
|
||||
# ============================================
|
||||
|
||||
networks:
|
||||
manacore-network:
|
||||
driver: bridge
|
||||
name: manacore-production
|
||||
|
||||
# ============================================
|
||||
# Volumes
|
||||
# ============================================
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
name: manacore-postgres-prod
|
||||
redis_data:
|
||||
name: manacore-redis-prod
|
||||
|
|
@ -1,290 +0,0 @@
|
|||
# ARCHIVED: Full staging config with all services
|
||||
# Active simplified config: docker-compose.staging.yml
|
||||
#
|
||||
# Services included:
|
||||
# - postgres, redis (infrastructure)
|
||||
# - mana-core-auth, chat-backend, manadeck-backend (backends)
|
||||
# - nginx (reverse proxy)
|
||||
#
|
||||
# To restore: cp docker-compose.staging.full.yml docker-compose.staging.yml
|
||||
|
||||
services:
|
||||
# ============================================
|
||||
# Infrastructure Services
|
||||
# ============================================
|
||||
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: manacore-postgres-staging
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_DB: ${POSTGRES_DB:-manacore}
|
||||
POSTGRES_USER: ${POSTGRES_USER:-postgres}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
# init.sql removed - not needed for staging
|
||||
ports:
|
||||
- "5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- manacore-network
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: manacore-redis-staging
|
||||
restart: unless-stopped
|
||||
command: redis-server --requirepass ${REDIS_PASSWORD:-redis123}
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
ports:
|
||||
- "6379:6379"
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "--raw", "incr", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- manacore-network
|
||||
|
||||
# ============================================
|
||||
# Backend Services
|
||||
# ============================================
|
||||
|
||||
mana-core-auth:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/mana-core-auth:${AUTH_VERSION:-latest}
|
||||
container_name: mana-core-auth-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 3001
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/manacore_auth
|
||||
REDIS_HOST: redis
|
||||
REDIS_PORT: 6379
|
||||
REDIS_PASSWORD: ${REDIS_PASSWORD:-redis123}
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
JWT_PUBLIC_KEY: ${JWT_PUBLIC_KEY}
|
||||
JWT_PRIVATE_KEY: ${JWT_PRIVATE_KEY}
|
||||
ports:
|
||||
- "3001:3001"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3001/api/v1/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# maerchenzauber-backend:
|
||||
# image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/maerchenzauber-backend:${MAERCHENZAUBER_VERSION:-latest}
|
||||
# container_name: maerchenzauber-backend-staging
|
||||
# restart: unless-stopped
|
||||
# depends_on:
|
||||
# mana-core-auth:
|
||||
# condition: service_healthy
|
||||
# environment:
|
||||
# NODE_ENV: staging
|
||||
# PORT: 3002
|
||||
# MANA_SERVICE_URL: http://mana-core-auth:3001
|
||||
# SUPABASE_URL: ${SUPABASE_URL}
|
||||
# SUPABASE_ANON_KEY: ${SUPABASE_ANON_KEY}
|
||||
# SUPABASE_SERVICE_ROLE_KEY: ${SUPABASE_SERVICE_ROLE_KEY}
|
||||
# AZURE_OPENAI_ENDPOINT: ${AZURE_OPENAI_ENDPOINT}
|
||||
# AZURE_OPENAI_API_KEY: ${AZURE_OPENAI_API_KEY}
|
||||
# AZURE_OPENAI_API_VERSION: ${AZURE_OPENAI_API_VERSION:-2024-12-01-preview}
|
||||
# ports:
|
||||
# - "3002:3002"
|
||||
# healthcheck:
|
||||
# test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3002/health"]
|
||||
# interval: 30s
|
||||
# timeout: 10s
|
||||
# retries: 3
|
||||
# networks:
|
||||
# - manacore-network
|
||||
# logging:
|
||||
# driver: "json-file"
|
||||
# options:
|
||||
# max-size: "10m"
|
||||
# max-file: "3"
|
||||
# # DISABLED: No Dockerfile exists yet
|
||||
|
||||
chat-backend:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/chat-backend:${CHAT_VERSION:-latest}
|
||||
container_name: chat-backend-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 3002
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/chat
|
||||
MANA_SERVICE_URL: http://mana-core-auth:3001
|
||||
SUPABASE_URL: ${SUPABASE_URL}
|
||||
SUPABASE_SERVICE_KEY: ${SUPABASE_SERVICE_ROLE_KEY}
|
||||
AZURE_OPENAI_ENDPOINT: ${AZURE_OPENAI_ENDPOINT}
|
||||
AZURE_OPENAI_API_KEY: ${AZURE_OPENAI_API_KEY}
|
||||
AZURE_OPENAI_API_VERSION: ${AZURE_OPENAI_API_VERSION:-2024-12-01-preview}
|
||||
ports:
|
||||
- "3003:3002"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3002/api/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
manadeck-backend:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/manadeck-backend:${MANADECK_VERSION:-latest}
|
||||
container_name: manadeck-backend-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 3003
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/manadeck
|
||||
MANA_SERVICE_URL: http://mana-core-auth:3001
|
||||
SUPABASE_URL: ${SUPABASE_URL}
|
||||
SUPABASE_SERVICE_KEY: ${SUPABASE_SERVICE_ROLE_KEY}
|
||||
ports:
|
||||
- "3004:3003"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3003/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# nutriphi-backend:
|
||||
# image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/nutriphi-backend:${NUTRIPHI_VERSION:-latest}
|
||||
# container_name: nutriphi-backend-staging
|
||||
# restart: unless-stopped
|
||||
# depends_on:
|
||||
# mana-core-auth:
|
||||
# condition: service_healthy
|
||||
# environment:
|
||||
# NODE_ENV: staging
|
||||
# PORT: 3004
|
||||
# MANA_SERVICE_URL: http://mana-core-auth:3001
|
||||
# SUPABASE_URL: ${SUPABASE_URL}
|
||||
# SUPABASE_SERVICE_KEY: ${SUPABASE_SERVICE_ROLE_KEY}
|
||||
# ports:
|
||||
# - "3005:3004"
|
||||
# healthcheck:
|
||||
# test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3004/health"]
|
||||
# interval: 30s
|
||||
# timeout: 10s
|
||||
# retries: 3
|
||||
# networks:
|
||||
# - manacore-network
|
||||
# logging:
|
||||
# driver: "json-file"
|
||||
# options:
|
||||
# max-size: "10m"
|
||||
# max-file: "3"
|
||||
# # DISABLED: No Dockerfile exists yet
|
||||
|
||||
# news-api:
|
||||
# image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/news-api:${NEWS_VERSION:-latest}
|
||||
# container_name: news-api-staging
|
||||
# restart: unless-stopped
|
||||
# depends_on:
|
||||
# mana-core-auth:
|
||||
# condition: service_healthy
|
||||
# environment:
|
||||
# NODE_ENV: staging
|
||||
# PORT: 3005
|
||||
# MANA_SERVICE_URL: http://mana-core-auth:3001
|
||||
# ports:
|
||||
# - "3006:3005"
|
||||
# healthcheck:
|
||||
# test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3005/health"]
|
||||
# interval: 30s
|
||||
# timeout: 10s
|
||||
# retries: 3
|
||||
# networks:
|
||||
# - manacore-network
|
||||
# logging:
|
||||
# driver: "json-file"
|
||||
# options:
|
||||
# max-size: "10m"
|
||||
# max-file: "3"
|
||||
# # DISABLED: No Dockerfile exists yet
|
||||
|
||||
# ============================================
|
||||
# Reverse Proxy (Optional)
|
||||
# ============================================
|
||||
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
container_name: manacore-nginx-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- mana-core-auth
|
||||
- chat-backend
|
||||
- manadeck-backend
|
||||
volumes:
|
||||
- ./docker/nginx/staging.conf:/etc/nginx/conf.d/default.conf
|
||||
- ./docker/nginx/ssl:/etc/nginx/ssl
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# ============================================
|
||||
# Networks
|
||||
# ============================================
|
||||
|
||||
networks:
|
||||
manacore-network:
|
||||
driver: bridge
|
||||
name: manacore-staging
|
||||
|
||||
# ============================================
|
||||
# Volumes
|
||||
# ============================================
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
name: manacore-postgres-staging
|
||||
redis_data:
|
||||
name: manacore-redis-staging
|
||||
|
|
@ -1,421 +0,0 @@
|
|||
# Simplified staging config: mana-core-auth + chat (backend + web)
|
||||
# Full config archived at: docker-compose.staging.full.yml
|
||||
#
|
||||
# To restore full config:
|
||||
# cp docker-compose.staging.full.yml docker-compose.staging.yml
|
||||
#
|
||||
# To add more services back:
|
||||
# 1. Copy the service block from docker-compose.staging.full.yml
|
||||
# 2. Add corresponding health check in .github/workflows/cd-staging.yml
|
||||
# 3. Add service to workflow_dispatch options in cd-staging.yml
|
||||
|
||||
services:
|
||||
# ============================================
|
||||
# Infrastructure Services
|
||||
# ============================================
|
||||
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: manacore-postgres-staging
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_DB: ${POSTGRES_DB:-manacore}
|
||||
POSTGRES_USER: ${POSTGRES_USER:-postgres}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
ports:
|
||||
- "5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- manacore-network
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: manacore-redis-staging
|
||||
restart: unless-stopped
|
||||
command: redis-server --requirepass ${REDIS_PASSWORD:-redis123}
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
ports:
|
||||
- "6379:6379"
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "--raw", "incr", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- manacore-network
|
||||
|
||||
# ============================================
|
||||
# Backend Services
|
||||
# ============================================
|
||||
|
||||
mana-core-auth:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/mana-core-auth:${AUTH_VERSION:-latest}
|
||||
container_name: mana-core-auth-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 3001
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/manacore_auth
|
||||
REDIS_HOST: redis
|
||||
REDIS_PORT: 6379
|
||||
REDIS_PASSWORD: ${REDIS_PASSWORD:-redis123}
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
JWT_PUBLIC_KEY: ${JWT_PUBLIC_KEY}
|
||||
JWT_PRIVATE_KEY: ${JWT_PRIVATE_KEY}
|
||||
# CORS - Allow all staging web app origins (HTTPS domains + localhost for dev)
|
||||
CORS_ORIGINS: https://chat.staging.manacore.ai,https://staging.manacore.ai,https://calendar.staging.manacore.ai,https://clock.staging.manacore.ai,https://todo.staging.manacore.ai,http://localhost:3000,http://localhost:5173,http://localhost:5186,http://localhost:5187,http://localhost:5188
|
||||
ports:
|
||||
- "3001:3001"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3001/api/v1/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
chat-backend:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/chat-backend:${CHAT_VERSION:-latest}
|
||||
container_name: chat-backend-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 3002
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/chat
|
||||
MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
SUPABASE_URL: ${SUPABASE_URL}
|
||||
SUPABASE_SERVICE_KEY: ${SUPABASE_SERVICE_ROLE_KEY}
|
||||
AZURE_OPENAI_ENDPOINT: ${AZURE_OPENAI_ENDPOINT}
|
||||
AZURE_OPENAI_API_KEY: ${AZURE_OPENAI_API_KEY}
|
||||
AZURE_OPENAI_API_VERSION: ${AZURE_OPENAI_API_VERSION:-2024-12-01-preview}
|
||||
ports:
|
||||
- "3002:3002"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3002/api/v1/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
chat-web:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/chat-web:${CHAT_WEB_VERSION:-latest}
|
||||
container_name: chat-web-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
chat-backend:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 3000
|
||||
# Server-side URLs (Docker internal network)
|
||||
PUBLIC_BACKEND_URL: http://chat-backend:3002
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
# Client-side URLs (browser access via HTTPS staging domains)
|
||||
PUBLIC_BACKEND_URL_CLIENT: https://chat-api.staging.manacore.ai
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: https://auth.staging.manacore.ai
|
||||
ports:
|
||||
- "3000:3000"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# ============================================
|
||||
# Manacore App
|
||||
# ============================================
|
||||
|
||||
manacore-web:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/manacore-web:${MANACORE_WEB_VERSION:-latest}
|
||||
container_name: manacore-web-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 5173
|
||||
# Auth URLs
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: https://auth.staging.manacore.ai
|
||||
# Backend URLs for dashboard widgets
|
||||
PUBLIC_TODO_API_URL: http://todo-backend:3018
|
||||
PUBLIC_TODO_API_URL_CLIENT: https://todo-api.staging.manacore.ai
|
||||
PUBLIC_CALENDAR_API_URL: http://calendar-backend:3016
|
||||
PUBLIC_CALENDAR_API_URL_CLIENT: https://calendar-api.staging.manacore.ai
|
||||
PUBLIC_CLOCK_API_URL: http://clock-backend:3017
|
||||
PUBLIC_CLOCK_API_URL_CLIENT: https://clock-api.staging.manacore.ai
|
||||
ports:
|
||||
- "5173:5173"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5173/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# ============================================
|
||||
# Todo App
|
||||
# ============================================
|
||||
|
||||
todo-backend:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/todo-backend:${TODO_BACKEND_VERSION:-latest}
|
||||
container_name: todo-backend-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 3018
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/todo
|
||||
MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
ports:
|
||||
- "3018:3018"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3018/api/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
todo-web:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/todo-web:${TODO_WEB_VERSION:-latest}
|
||||
container_name: todo-web-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
todo-backend:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 5188
|
||||
# Server-side URLs (Docker internal network)
|
||||
PUBLIC_BACKEND_URL: http://todo-backend:3018
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
# Client-side URLs (browser access via public IP)
|
||||
PUBLIC_BACKEND_URL_CLIENT: http://46.224.108.214:3018
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: http://46.224.108.214:3001
|
||||
ports:
|
||||
- "5188:5188"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5188/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# ============================================
|
||||
# Calendar App
|
||||
# ============================================
|
||||
|
||||
calendar-backend:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/calendar-backend:${CALENDAR_VERSION:-latest}
|
||||
container_name: calendar-backend-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 3016
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/calendar
|
||||
DB_HOST: postgres
|
||||
DB_PORT: 5432
|
||||
DB_USER: ${POSTGRES_USER:-postgres}
|
||||
MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
CORS_ORIGINS: https://calendar.staging.manacore.ai,https://staging.manacore.ai,http://localhost:5186,http://localhost:5173
|
||||
ports:
|
||||
- "3016:3016"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3016/api/v1/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
calendar-web:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/calendar-web:${CALENDAR_WEB_VERSION:-latest}
|
||||
container_name: calendar-web-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
calendar-backend:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 5186
|
||||
PUBLIC_BACKEND_URL: http://calendar-backend:3016
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
PUBLIC_BACKEND_URL_CLIENT: https://calendar-api.staging.manacore.ai
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: https://auth.staging.manacore.ai
|
||||
ports:
|
||||
- "5186:5186"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5186/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# ============================================
|
||||
# Clock App
|
||||
# ============================================
|
||||
|
||||
clock-backend:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/clock-backend:${CLOCK_VERSION:-latest}
|
||||
container_name: clock-backend-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
mana-core-auth:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 3017
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/clock
|
||||
DB_HOST: postgres
|
||||
DB_PORT: 5432
|
||||
DB_USER: ${POSTGRES_USER:-postgres}
|
||||
MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
CORS_ORIGINS: https://clock.staging.manacore.ai,https://staging.manacore.ai,http://localhost:5187,http://localhost:5173
|
||||
ports:
|
||||
- "3017:3017"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3017/api/v1/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
clock-web:
|
||||
image: ${DOCKER_REGISTRY:-ghcr.io/memo-2023}/clock-web:${CLOCK_WEB_VERSION:-latest}
|
||||
container_name: clock-web-staging
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
clock-backend:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 5187
|
||||
PUBLIC_BACKEND_URL: http://clock-backend:3017
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
PUBLIC_BACKEND_URL_CLIENT: https://clock-api.staging.manacore.ai
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: https://auth.staging.manacore.ai
|
||||
ports:
|
||||
- "5187:5187"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5187/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
networks:
|
||||
- manacore-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# ============================================
|
||||
# Networks
|
||||
# ============================================
|
||||
|
||||
networks:
|
||||
manacore-network:
|
||||
driver: bridge
|
||||
name: manacore-staging
|
||||
|
||||
# ============================================
|
||||
# Volumes
|
||||
# ============================================
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
name: manacore-postgres-staging
|
||||
redis_data:
|
||||
name: manacore-redis-staging
|
||||
|
|
@ -1,522 +0,0 @@
|
|||
# CI/CD Setup Guide
|
||||
|
||||
Step-by-step guide to configure the CI/CD pipeline for the manacore-monorepo.
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. [Configure GitHub Secrets](#github-secrets)
|
||||
2. [Set Up Docker Registry](#docker-registry)
|
||||
3. [Configure Deployment Servers](#deployment-servers)
|
||||
4. [Enable GitHub Actions](#enable-github-actions)
|
||||
5. [Test the Pipeline](#test-the-pipeline)
|
||||
|
||||
## GitHub Secrets
|
||||
|
||||
### Navigate to Secrets
|
||||
|
||||
1. Go to your GitHub repository
|
||||
2. Click `Settings` > `Secrets and variables` > `Actions`
|
||||
3. Click `New repository secret`
|
||||
|
||||
### Required Secrets
|
||||
|
||||
#### Docker Registry (3 secrets)
|
||||
|
||||
```
|
||||
DOCKER_USERNAME=your-docker-hub-username
|
||||
DOCKER_PASSWORD=your-docker-hub-password-or-token
|
||||
DOCKER_REGISTRY=wuesteon
|
||||
```
|
||||
|
||||
**How to get Docker credentials**:
|
||||
1. Create account at https://hub.docker.com
|
||||
2. Go to Account Settings > Security
|
||||
3. Create Access Token
|
||||
4. Use token as DOCKER_PASSWORD
|
||||
|
||||
#### SSH Keys (2 secrets per environment)
|
||||
|
||||
Generate SSH keys:
|
||||
```bash
|
||||
# Generate new key pair
|
||||
ssh-keygen -t ed25519 -C "github-actions-staging" -f ~/.ssh/github-actions-staging
|
||||
|
||||
# Display private key (copy this to GitHub secret)
|
||||
cat ~/.ssh/github-actions-staging
|
||||
|
||||
# Display public key (add this to server)
|
||||
cat ~/.ssh/github-actions-staging.pub
|
||||
```
|
||||
|
||||
Add to GitHub:
|
||||
```
|
||||
STAGING_SSH_KEY=<private-key-content>
|
||||
PRODUCTION_SSH_KEY=<private-key-content>
|
||||
```
|
||||
|
||||
#### Server Access (2 secrets per environment)
|
||||
|
||||
```
|
||||
STAGING_HOST=staging.manacore.app
|
||||
STAGING_USER=deploy
|
||||
PRODUCTION_HOST=api.manacore.app
|
||||
PRODUCTION_USER=deploy
|
||||
```
|
||||
|
||||
#### Database Configuration (Staging)
|
||||
|
||||
```
|
||||
STAGING_POSTGRES_HOST=postgres
|
||||
STAGING_POSTGRES_PORT=5432
|
||||
STAGING_POSTGRES_DB=manacore
|
||||
STAGING_POSTGRES_USER=postgres
|
||||
STAGING_POSTGRES_PASSWORD=<generate-secure-password>
|
||||
```
|
||||
|
||||
Generate secure password:
|
||||
```bash
|
||||
openssl rand -base64 32
|
||||
```
|
||||
|
||||
#### Redis Configuration (Staging)
|
||||
|
||||
```
|
||||
STAGING_REDIS_HOST=redis
|
||||
STAGING_REDIS_PORT=6379
|
||||
STAGING_REDIS_PASSWORD=<generate-secure-password>
|
||||
```
|
||||
|
||||
#### Supabase Configuration (Staging)
|
||||
|
||||
```
|
||||
STAGING_SUPABASE_URL=https://xxxxx.supabase.co
|
||||
STAGING_SUPABASE_ANON_KEY=<your-anon-key>
|
||||
STAGING_SUPABASE_SERVICE_ROLE_KEY=<your-service-role-key>
|
||||
```
|
||||
|
||||
**How to get Supabase credentials**:
|
||||
1. Go to https://supabase.com
|
||||
2. Open your project
|
||||
3. Go to Project Settings > API
|
||||
4. Copy `URL`, `anon public`, and `service_role` keys
|
||||
|
||||
#### Azure OpenAI Configuration (Staging)
|
||||
|
||||
```
|
||||
STAGING_AZURE_OPENAI_ENDPOINT=https://xxxxx.openai.azure.com
|
||||
STAGING_AZURE_OPENAI_API_KEY=<your-api-key>
|
||||
STAGING_AZURE_OPENAI_API_VERSION=2024-12-01-preview
|
||||
```
|
||||
|
||||
#### JWT Configuration (Staging)
|
||||
|
||||
Generate JWT keys:
|
||||
```bash
|
||||
# Generate private key
|
||||
openssl genrsa -out jwt-private.pem 2048
|
||||
|
||||
# Extract public key
|
||||
openssl rsa -in jwt-private.pem -pubout -out jwt-public.pem
|
||||
|
||||
# Generate secret
|
||||
openssl rand -hex 32
|
||||
|
||||
# View private key (copy to STAGING_JWT_PRIVATE_KEY)
|
||||
cat jwt-private.pem
|
||||
|
||||
# View public key (copy to STAGING_JWT_PUBLIC_KEY)
|
||||
cat jwt-public.pem
|
||||
```
|
||||
|
||||
Add to GitHub:
|
||||
```
|
||||
STAGING_JWT_SECRET=<hex-secret>
|
||||
STAGING_JWT_PUBLIC_KEY=<public-key-content>
|
||||
STAGING_JWT_PRIVATE_KEY=<private-key-content>
|
||||
```
|
||||
|
||||
#### Production Secrets
|
||||
|
||||
Repeat all the above for production with `PRODUCTION_` prefix.
|
||||
|
||||
**Important**: Use different values for production! Never reuse staging credentials.
|
||||
|
||||
#### Optional: Turbo Cache
|
||||
|
||||
For faster builds with remote caching:
|
||||
|
||||
```
|
||||
TURBO_TOKEN=<vercel-token>
|
||||
TURBO_TEAM=<team-name>
|
||||
```
|
||||
|
||||
Get these from https://vercel.com
|
||||
|
||||
#### Optional: Code Coverage
|
||||
|
||||
```
|
||||
CODECOV_TOKEN=<codecov-token>
|
||||
```
|
||||
|
||||
Get from https://codecov.io
|
||||
|
||||
## Docker Registry
|
||||
|
||||
### Option 1: Docker Hub (Recommended)
|
||||
|
||||
1. Sign up at https://hub.docker.com
|
||||
2. Create access token (Account Settings > Security)
|
||||
3. Add credentials to GitHub secrets
|
||||
4. Create repository for each service:
|
||||
- `wuesteon/mana-core-auth`
|
||||
- `wuesteon/chat-backend`
|
||||
- `wuesteon/maerchenzauber-backend`
|
||||
- etc.
|
||||
|
||||
### Option 2: GitHub Container Registry
|
||||
|
||||
```yaml
|
||||
# In .github/workflows/ci-main.yml, change:
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Change image names to:
|
||||
ghcr.io/${{ github.repository_owner }}/service-name
|
||||
```
|
||||
|
||||
### Option 3: Private Registry
|
||||
|
||||
Update workflows to use your registry URL:
|
||||
```
|
||||
registry: registry.example.com
|
||||
```
|
||||
|
||||
## Deployment Servers
|
||||
|
||||
### Server Requirements
|
||||
|
||||
- **OS**: Ubuntu 20.04+ or Debian 11+
|
||||
- **RAM**: 4GB minimum, 8GB recommended
|
||||
- **Storage**: 50GB minimum, 100GB recommended
|
||||
- **CPU**: 2 cores minimum, 4 cores recommended
|
||||
|
||||
### Server Setup
|
||||
|
||||
#### 1. Create Deploy User
|
||||
|
||||
```bash
|
||||
# On server
|
||||
sudo adduser deploy
|
||||
sudo usermod -aG docker deploy
|
||||
sudo su - deploy
|
||||
```
|
||||
|
||||
#### 2. Install Docker
|
||||
|
||||
```bash
|
||||
# Update system
|
||||
sudo apt update && sudo apt upgrade -y
|
||||
|
||||
# Install Docker
|
||||
curl -fsSL https://get.docker.com -o get-docker.sh
|
||||
sudo sh get-docker.sh
|
||||
|
||||
# Install Docker Compose
|
||||
sudo apt install docker-compose-plugin
|
||||
|
||||
# Verify installation
|
||||
docker --version
|
||||
docker compose version
|
||||
```
|
||||
|
||||
#### 3. Configure SSH Access
|
||||
|
||||
```bash
|
||||
# On server, as deploy user
|
||||
mkdir -p ~/.ssh
|
||||
chmod 700 ~/.ssh
|
||||
|
||||
# Add GitHub Actions public key to authorized_keys
|
||||
echo "ssh-ed25519 AAAAC3... github-actions-staging" >> ~/.ssh/authorized_keys
|
||||
chmod 600 ~/.ssh/authorized_keys
|
||||
```
|
||||
|
||||
#### 4. Test SSH Access
|
||||
|
||||
```bash
|
||||
# From your local machine
|
||||
ssh -i ~/.ssh/github-actions-staging deploy@staging.manacore.app
|
||||
|
||||
# Should login without password prompt
|
||||
```
|
||||
|
||||
#### 5. Create Deployment Directories
|
||||
|
||||
```bash
|
||||
# On server
|
||||
mkdir -p ~/manacore-staging
|
||||
mkdir -p ~/manacore-staging/logs
|
||||
mkdir -p ~/manacore-staging/backups
|
||||
|
||||
# Or for production
|
||||
mkdir -p ~/manacore-production
|
||||
mkdir -p ~/manacore-production/logs
|
||||
mkdir -p ~/manacore-production/backups
|
||||
```
|
||||
|
||||
#### 6. Configure Firewall
|
||||
|
||||
```bash
|
||||
# Allow SSH
|
||||
sudo ufw allow 22/tcp
|
||||
|
||||
# Allow HTTP/HTTPS
|
||||
sudo ufw allow 80/tcp
|
||||
sudo ufw allow 443/tcp
|
||||
|
||||
# Allow specific service ports (optional, if not using reverse proxy)
|
||||
sudo ufw allow 3001/tcp # Mana Core Auth
|
||||
sudo ufw allow 3002/tcp # Maerchenzauber Backend
|
||||
|
||||
# Enable firewall
|
||||
sudo ufw enable
|
||||
```
|
||||
|
||||
#### 7. Set Up Reverse Proxy (Optional)
|
||||
|
||||
If using Nginx as reverse proxy:
|
||||
|
||||
```bash
|
||||
sudo apt install nginx
|
||||
|
||||
# Create configuration
|
||||
sudo nano /etc/nginx/sites-available/manacore
|
||||
```
|
||||
|
||||
```nginx
|
||||
server {
|
||||
listen 80;
|
||||
server_name api.manacore.app;
|
||||
|
||||
location /api/v1/ {
|
||||
proxy_pass http://localhost:3001;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
}
|
||||
|
||||
location /health {
|
||||
proxy_pass http://localhost:3002;
|
||||
proxy_set_header Host $host;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```bash
|
||||
# Enable site
|
||||
sudo ln -s /etc/nginx/sites-available/manacore /etc/nginx/sites-enabled/
|
||||
sudo nginx -t
|
||||
sudo systemctl reload nginx
|
||||
```
|
||||
|
||||
## GitHub Environments
|
||||
|
||||
### Create Environments
|
||||
|
||||
1. Go to repository Settings > Environments
|
||||
2. Create two environments:
|
||||
- `staging`
|
||||
- `production-approval`
|
||||
|
||||
### Configure Production Approval
|
||||
|
||||
1. Go to `production-approval` environment
|
||||
2. Add required reviewers
|
||||
3. Set wait timer (optional): 5 minutes
|
||||
4. Add environment secrets (if any differ from repository secrets)
|
||||
|
||||
## Enable GitHub Actions
|
||||
|
||||
### 1. Check Workflow Permissions
|
||||
|
||||
1. Go to Settings > Actions > General
|
||||
2. Scroll to "Workflow permissions"
|
||||
3. Select "Read and write permissions"
|
||||
4. Check "Allow GitHub Actions to create and approve pull requests"
|
||||
5. Click Save
|
||||
|
||||
### 2. Enable Workflows
|
||||
|
||||
Workflows are automatically enabled when files are pushed to `.github/workflows/`
|
||||
|
||||
### 3. Configure Branch Protection
|
||||
|
||||
1. Go to Settings > Branches
|
||||
2. Add rule for `main` branch:
|
||||
- ✅ Require status checks to pass
|
||||
- Select: `All PR Checks Complete`
|
||||
- ✅ Require branches to be up to date
|
||||
- ✅ Require conversation resolution
|
||||
- ✅ Do not allow bypassing
|
||||
|
||||
## Test the Pipeline
|
||||
|
||||
### 1. Test PR Workflow
|
||||
|
||||
```bash
|
||||
# Create test branch
|
||||
git checkout -b test/ci-pipeline
|
||||
|
||||
# Make a small change
|
||||
echo "# CI/CD Test" >> README.md
|
||||
|
||||
# Commit and push
|
||||
git add README.md
|
||||
git commit -m "test: verify CI pipeline"
|
||||
git push origin test/ci-pipeline
|
||||
|
||||
# Create PR on GitHub
|
||||
# Watch GitHub Actions tab for workflow execution
|
||||
```
|
||||
|
||||
**Expected Results**:
|
||||
- ✅ Detect changed files
|
||||
- ✅ Format check passes
|
||||
- ✅ Type check passes
|
||||
- ✅ Build completes
|
||||
- ✅ Tests run
|
||||
|
||||
### 2. Test Main Branch Workflow
|
||||
|
||||
```bash
|
||||
# Merge the PR
|
||||
# Watch GitHub Actions for:
|
||||
```
|
||||
|
||||
**Expected Results**:
|
||||
- ✅ Full validation passes
|
||||
- ✅ Docker images built
|
||||
- ✅ Images pushed to registry
|
||||
- ✅ Staging deployment triggered
|
||||
|
||||
### 3. Test Staging Deployment
|
||||
|
||||
Check staging server:
|
||||
```bash
|
||||
ssh deploy@staging.manacore.app
|
||||
cd ~/manacore-staging
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
**Expected Results**:
|
||||
- All services running
|
||||
- Health checks passing
|
||||
|
||||
### 4. Test Production Deployment
|
||||
|
||||
1. Go to Actions > CD - Production Deployment
|
||||
2. Click "Run workflow"
|
||||
3. Select:
|
||||
- Service: `all`
|
||||
- Environment: `production`
|
||||
- Confirm: `deploy`
|
||||
4. Click "Run workflow"
|
||||
5. Approve when prompted
|
||||
|
||||
**Expected Results**:
|
||||
- ✅ Backup created
|
||||
- ✅ Deployment completes
|
||||
- ✅ Health checks pass
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Workflow Not Triggering
|
||||
|
||||
**Issue**: PR workflow doesn't run
|
||||
|
||||
**Solution**:
|
||||
- Check workflow file syntax
|
||||
- Verify branch protection rules
|
||||
- Check repository permissions
|
||||
|
||||
### Docker Build Fails
|
||||
|
||||
**Issue**: Image build fails in CI
|
||||
|
||||
**Solution**:
|
||||
```bash
|
||||
# Test build locally
|
||||
docker buildx build --file apps/chat/apps/backend/Dockerfile .
|
||||
|
||||
# Check for syntax errors
|
||||
yamllint .github/workflows/ci-main.yml
|
||||
```
|
||||
|
||||
### SSH Connection Fails
|
||||
|
||||
**Issue**: Can't connect to server from GitHub Actions
|
||||
|
||||
**Solution**:
|
||||
1. Verify SSH key is correct
|
||||
2. Check server firewall
|
||||
3. Verify user has docker permissions
|
||||
|
||||
```bash
|
||||
# Test locally
|
||||
ssh -i ~/.ssh/github-actions-staging deploy@staging.manacore.app 'docker ps'
|
||||
```
|
||||
|
||||
### Missing Secrets
|
||||
|
||||
**Issue**: Workflow fails with "secret not found"
|
||||
|
||||
**Solution**:
|
||||
1. Go to Settings > Secrets
|
||||
2. Verify secret name matches exactly
|
||||
3. Check for typos
|
||||
4. Ensure secret has value
|
||||
|
||||
## Maintenance
|
||||
|
||||
### Rotate SSH Keys
|
||||
|
||||
Every 90 days, rotate SSH keys:
|
||||
|
||||
```bash
|
||||
# Generate new keys
|
||||
ssh-keygen -t ed25519 -C "github-actions-$(date +%Y%m)" -f ~/.ssh/github-actions-new
|
||||
|
||||
# Add new public key to server
|
||||
ssh deploy@staging.manacore.app
|
||||
echo "ssh-ed25519 NEW_KEY..." >> ~/.ssh/authorized_keys
|
||||
|
||||
# Update GitHub secret with new private key
|
||||
# Test new key works
|
||||
# Remove old key from authorized_keys
|
||||
```
|
||||
|
||||
### Update Docker Credentials
|
||||
|
||||
Rotate Docker access tokens annually:
|
||||
|
||||
1. Generate new token in Docker Hub
|
||||
2. Update `DOCKER_PASSWORD` secret
|
||||
3. Test by triggering workflow
|
||||
|
||||
### Monitor Workflow Usage
|
||||
|
||||
Check Actions usage:
|
||||
1. Go to Settings > Billing
|
||||
2. Review Actions minutes used
|
||||
3. Set spending limits if needed
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. [Read Deployment Guide](DEPLOYMENT.md)
|
||||
2. Configure monitoring
|
||||
3. Set up alerts
|
||||
4. Document runbooks
|
||||
5. Train team on deployment process
|
||||
|
|
@ -1,762 +1,92 @@
|
|||
# Deployment Guide
|
||||
|
||||
This guide covers the complete deployment process for the manacore-monorepo, including CI/CD setup, Docker orchestration, and production deployment strategies.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Overview](#overview)
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [CI/CD Pipeline](#cicd-pipeline)
|
||||
- [Docker Setup](#docker-setup)
|
||||
- [Deployment Environments](#deployment-environments)
|
||||
- [Deployment Process](#deployment-process)
|
||||
- [Rollback Procedures](#rollback-procedures)
|
||||
- [Monitoring and Maintenance](#monitoring-and-maintenance)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
|
||||
## Overview
|
||||
|
||||
The manacore-monorepo uses a comprehensive CI/CD pipeline with the following features:
|
||||
|
||||
- **Automated Testing**: PR checks, type checking, linting, and format validation
|
||||
- **Smart Build Detection**: Only builds affected projects using Turborepo filters
|
||||
- **Docker Orchestration**: Multi-stage builds for all service types
|
||||
- **Zero-Downtime Deployments**: Rolling updates with health checks
|
||||
- **Automated Rollbacks**: Emergency rollback procedures
|
||||
- **Security Scanning**: Dependency audits and vulnerability checks
|
||||
|
||||
### Architecture
|
||||
Production runs on a **Mac Mini** accessible via Cloudflare Tunnel at **mana.how**.
|
||||
|
||||
```
|
||||
┌─────────────────┐
|
||||
│ GitHub PR │
|
||||
└────────┬────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ PR Validation │ ← Lint, Type Check, Build, Test
|
||||
└────────┬────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Merge to Main │
|
||||
└────────┬────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Build & Push │ ← Docker images to registry
|
||||
│ Docker Images │
|
||||
└────────┬────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Deploy Staging │ ← Automatic deployment
|
||||
└────────┬────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Manual Approval │ ← Production gate
|
||||
└────────┬────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│Deploy Production│ ← With backup & health checks
|
||||
└─────────────────┘
|
||||
Push to main → CI builds Docker images → GHCR → Watchtower pulls & restarts
|
||||
(automatic) (automatic, ~5 min)
|
||||
```
|
||||
|
||||
## Prerequisites
|
||||
**Watchtower** automatically checks for new Docker images every 5 minutes and updates running containers.
|
||||
|
||||
### Required Tools
|
||||
## Quick Reference
|
||||
|
||||
- **Docker**: Version 20.10+
|
||||
- **Docker Compose**: Version 2.0+
|
||||
- **Node.js**: Version 20+
|
||||
- **pnpm**: Version 9.15.0
|
||||
- **Git**: Version 2.30+
|
||||
|
||||
### Required Accounts
|
||||
|
||||
- **GitHub**: Repository access and Actions enabled
|
||||
- **Docker Hub**: For image storage (or alternative registry)
|
||||
- **Supabase**: For database services
|
||||
- **Azure**: For OpenAI services
|
||||
- **Hetzner/Coolify**: For hosting (recommended)
|
||||
|
||||
### GitHub Secrets
|
||||
|
||||
Configure the following secrets in your GitHub repository (`Settings > Secrets and variables > Actions`):
|
||||
|
||||
#### Docker Registry
|
||||
|
||||
```
|
||||
DOCKER_USERNAME=your-docker-username
|
||||
DOCKER_PASSWORD=your-docker-password
|
||||
DOCKER_REGISTRY=wuesteon
|
||||
```
|
||||
|
||||
#### Staging Environment
|
||||
|
||||
```
|
||||
STAGING_HOST=staging.manacore.app
|
||||
STAGING_USER=deploy
|
||||
STAGING_SSH_KEY=<private-key>
|
||||
STAGING_POSTGRES_HOST=postgres
|
||||
STAGING_POSTGRES_PORT=5432
|
||||
STAGING_POSTGRES_DB=manacore
|
||||
STAGING_POSTGRES_USER=postgres
|
||||
STAGING_POSTGRES_PASSWORD=<secure-password>
|
||||
STAGING_REDIS_HOST=redis
|
||||
STAGING_REDIS_PORT=6379
|
||||
STAGING_REDIS_PASSWORD=<secure-password>
|
||||
STAGING_SUPABASE_URL=https://xxx.supabase.co
|
||||
STAGING_SUPABASE_ANON_KEY=<anon-key>
|
||||
STAGING_SUPABASE_SERVICE_ROLE_KEY=<service-role-key>
|
||||
STAGING_AZURE_OPENAI_ENDPOINT=https://xxx.openai.azure.com
|
||||
STAGING_AZURE_OPENAI_API_KEY=<api-key>
|
||||
STAGING_JWT_SECRET=<jwt-secret>
|
||||
STAGING_JWT_PUBLIC_KEY=<public-key>
|
||||
STAGING_JWT_PRIVATE_KEY=<private-key>
|
||||
```
|
||||
|
||||
#### Production Environment
|
||||
|
||||
```
|
||||
PRODUCTION_HOST=api.manacore.app
|
||||
PRODUCTION_USER=deploy
|
||||
PRODUCTION_SSH_KEY=<private-key>
|
||||
PRODUCTION_API_URL=https://api.manacore.app
|
||||
# ... (same structure as staging with production values)
|
||||
```
|
||||
|
||||
#### Turbo Cache (Optional)
|
||||
|
||||
```
|
||||
TURBO_TOKEN=<vercel-token>
|
||||
TURBO_TEAM=<team-name>
|
||||
```
|
||||
|
||||
#### Code Coverage (Optional)
|
||||
|
||||
```
|
||||
CODECOV_TOKEN=<codecov-token>
|
||||
```
|
||||
| Environment | Location | Domain |
|
||||
|-------------|----------|--------|
|
||||
| Local Dev | Your machine | localhost |
|
||||
| Production | Mac Mini | mana.how |
|
||||
|
||||
## CI/CD Pipeline
|
||||
|
||||
### Workflow Files
|
||||
### What happens automatically
|
||||
|
||||
The CI/CD pipeline consists of 6 GitHub Actions workflows:
|
||||
1. **Push to main** triggers CI workflow
|
||||
2. CI detects changed services
|
||||
3. Docker images are built for changed services
|
||||
4. Images are pushed to GitHub Container Registry (ghcr.io)
|
||||
|
||||
#### 1. PR Validation (`ci-pull-request.yml`)
|
||||
### What happens automatically (Watchtower)
|
||||
|
||||
**Triggers**: Pull requests to `main` or `develop`
|
||||
Watchtower runs as a Docker container and:
|
||||
1. Checks GHCR for new images every 5 minutes
|
||||
2. Pulls updated images
|
||||
3. Recreates containers with new images
|
||||
4. Cleans up old images
|
||||
|
||||
**Steps**:
|
||||
No manual action needed for regular deployments.
|
||||
|
||||
1. Detect changed projects
|
||||
2. Run format check
|
||||
3. Run linting
|
||||
4. Type checking
|
||||
5. Build affected projects
|
||||
6. Run tests with coverage
|
||||
7. Docker build validation
|
||||
8. Security scanning
|
||||
## Manual Deployment (if needed)
|
||||
|
||||
**Required Checks**: Format, Type Check, Build
|
||||
|
||||
#### 2. Main Branch CI (`ci-main.yml`)
|
||||
|
||||
**Triggers**: Push to `main` branch
|
||||
|
||||
**Steps**:
|
||||
|
||||
1. Full validation (all projects)
|
||||
2. Build all projects
|
||||
3. Build and push Docker images
|
||||
4. Trigger staging deployment
|
||||
|
||||
#### 3. Staging Deployment (`cd-staging.yml`)
|
||||
|
||||
**Triggers**: Manual or automated from main CI
|
||||
|
||||
**Steps**:
|
||||
|
||||
1. SSH to staging server
|
||||
2. Pull latest Docker images
|
||||
3. Update environment configuration
|
||||
4. Deploy services with zero-downtime
|
||||
5. Run database migrations
|
||||
6. Health checks
|
||||
7. Notify on completion
|
||||
|
||||
#### 4. Production Deployment (`cd-production.yml`)
|
||||
|
||||
**Triggers**: Manual only
|
||||
|
||||
**Steps**:
|
||||
|
||||
1. Validate deployment request
|
||||
2. Request manual approval
|
||||
3. Create database backup
|
||||
4. Deploy with rolling update
|
||||
5. Run migrations
|
||||
6. Health checks
|
||||
7. Monitor for 5 minutes
|
||||
8. Run smoke tests
|
||||
9. Notify on completion
|
||||
|
||||
#### 5. Test Coverage (`test-coverage.yml`)
|
||||
|
||||
**Triggers**: PRs, pushes to main, weekly schedule
|
||||
|
||||
**Steps**:
|
||||
|
||||
1. Run all tests with coverage
|
||||
2. Collect coverage reports
|
||||
3. Upload to Codecov
|
||||
4. Generate summary
|
||||
5. Check coverage thresholds (50% minimum)
|
||||
|
||||
#### 6. Dependency Updates (`dependency-update.yml`)
|
||||
|
||||
**Triggers**: Weekly schedule, manual
|
||||
|
||||
**Steps**:
|
||||
|
||||
1. Check for outdated dependencies
|
||||
2. Run security audit
|
||||
3. Create issue for critical vulnerabilities
|
||||
4. Update lock file
|
||||
5. Create PR with changes
|
||||
|
||||
### Change Detection
|
||||
|
||||
The pipeline uses `dorny/paths-filter` to detect which projects have changed:
|
||||
|
||||
```yaml
|
||||
filters:
|
||||
maerchenzauber:
|
||||
- 'apps/maerchenzauber/**'
|
||||
- 'packages/**'
|
||||
chat:
|
||||
- 'apps/chat/**'
|
||||
- 'packages/**'
|
||||
# ... other projects
|
||||
```
|
||||
|
||||
Only affected projects are built and tested, saving time and resources.
|
||||
|
||||
## Docker Setup
|
||||
|
||||
### Multi-Stage Builds
|
||||
|
||||
All Dockerfiles use multi-stage builds for optimal image size:
|
||||
|
||||
1. **Builder Stage**: Install dependencies and build
|
||||
2. **Production Stage**: Copy only production dependencies and built assets
|
||||
|
||||
### Service Types
|
||||
|
||||
#### NestJS Backend
|
||||
|
||||
Template: `docker/templates/Dockerfile.nestjs`
|
||||
|
||||
```dockerfile
|
||||
FROM node:20-alpine AS builder
|
||||
# Build with all dependencies
|
||||
|
||||
FROM node:20-alpine AS production
|
||||
# Production with minimal footprint
|
||||
```
|
||||
|
||||
**Key Features**:
|
||||
|
||||
- Non-root user (`nestjs`)
|
||||
- Health checks
|
||||
- Resource limits
|
||||
- Optimized caching
|
||||
|
||||
#### SvelteKit Web
|
||||
|
||||
Template: `docker/templates/Dockerfile.sveltekit`
|
||||
|
||||
**Key Features**:
|
||||
|
||||
- SSR support
|
||||
- Static asset optimization
|
||||
- Non-root user
|
||||
- Health endpoints
|
||||
|
||||
#### Astro Landing Pages
|
||||
|
||||
Template: `docker/templates/Dockerfile.astro`
|
||||
|
||||
**Key Features**:
|
||||
|
||||
- Nginx-based serving
|
||||
- Gzip compression
|
||||
- Security headers
|
||||
- Static file caching
|
||||
|
||||
### Docker Compose
|
||||
|
||||
Two environments are provided:
|
||||
|
||||
#### Staging (`docker-compose.staging.yml`)
|
||||
|
||||
- Includes PostgreSQL and Redis
|
||||
- Service discovery via Docker network
|
||||
- Local development configuration
|
||||
- Verbose logging
|
||||
|
||||
#### Production (`docker-compose.production.yml`)
|
||||
|
||||
- External database connections
|
||||
- Resource limits
|
||||
- Optimized logging
|
||||
- Security hardening
|
||||
|
||||
## Deployment Environments
|
||||
|
||||
### Staging
|
||||
|
||||
**Purpose**: Pre-production testing and validation
|
||||
|
||||
**URL**: `https://staging.manacore.app`
|
||||
|
||||
**Characteristics**:
|
||||
|
||||
- Automatic deployment from `main` branch
|
||||
- Separate database instances
|
||||
- Full feature parity with production
|
||||
- Verbose logging enabled
|
||||
|
||||
**Access**:
|
||||
For immediate deployment without waiting for Watchtower:
|
||||
|
||||
```bash
|
||||
ssh deploy@staging.manacore.app
|
||||
cd ~/manacore-staging
|
||||
docker compose ps
|
||||
ssh mana-server "cd ~/projects/manacore-monorepo && ./scripts/mac-mini/deploy.sh"
|
||||
```
|
||||
|
||||
### Production
|
||||
|
||||
**Purpose**: Live production environment
|
||||
|
||||
**URL**: `https://api.manacore.app`
|
||||
|
||||
**Characteristics**:
|
||||
|
||||
- Manual deployment with approval
|
||||
- High availability configuration
|
||||
- Performance optimized
|
||||
- Enhanced monitoring
|
||||
- Backup procedures
|
||||
|
||||
**Access**:
|
||||
## Monitoring
|
||||
|
||||
```bash
|
||||
ssh deploy@api.manacore.app
|
||||
cd ~/manacore-production
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
## Deployment Process
|
||||
|
||||
### Automated Staging Deployment
|
||||
|
||||
Staging deployment happens automatically when code is merged to `main`:
|
||||
|
||||
```bash
|
||||
# 1. Create PR
|
||||
git checkout -b feature/my-feature
|
||||
git push origin feature/my-feature
|
||||
|
||||
# 2. PR Validation runs automatically
|
||||
# - Checks pass
|
||||
|
||||
# 3. Merge to main
|
||||
# - Main CI builds Docker images
|
||||
# - Pushes to registry
|
||||
# - Triggers staging deployment
|
||||
|
||||
# 4. Staging deployment
|
||||
# - Pulls latest images
|
||||
# - Rolling update
|
||||
# - Health checks
|
||||
# - Success!
|
||||
```
|
||||
|
||||
### Manual Production Deployment
|
||||
|
||||
Production requires manual trigger and approval:
|
||||
|
||||
#### Step 1: Trigger Deployment
|
||||
|
||||
Go to GitHub Actions > CD - Production Deployment > Run workflow
|
||||
|
||||
**Required Inputs**:
|
||||
|
||||
- Service: `all` or specific service name
|
||||
- Environment: `production`
|
||||
- Confirm: Type `deploy`
|
||||
|
||||
#### Step 2: Approval
|
||||
|
||||
Workflow pauses for manual approval at `production-approval` environment.
|
||||
|
||||
Approve in: GitHub > Settings > Environments > production-approval
|
||||
|
||||
#### Step 3: Automated Deployment
|
||||
|
||||
Once approved:
|
||||
|
||||
1. Creates database backup
|
||||
2. Tags current deployment
|
||||
3. Pulls latest images
|
||||
4. Runs migrations
|
||||
5. Rolling update (zero-downtime)
|
||||
6. Health checks
|
||||
7. 5-minute monitoring
|
||||
8. Smoke tests
|
||||
|
||||
#### Step 4: Verification
|
||||
|
||||
```bash
|
||||
# Check deployment status
|
||||
./scripts/deploy/health-check.sh production
|
||||
# Check service status
|
||||
ssh mana-server "./scripts/mac-mini/status.sh"
|
||||
|
||||
# View logs
|
||||
ssh deploy@api.manacore.app
|
||||
cd ~/manacore-production
|
||||
docker compose logs -f
|
||||
ssh mana-server "docker logs -f manacore-chat-backend"
|
||||
|
||||
# Health check
|
||||
ssh mana-server "./scripts/mac-mini/health-check.sh"
|
||||
```
|
||||
|
||||
### Manual Deployment Scripts
|
||||
## Services & URLs
|
||||
|
||||
For manual deployments or troubleshooting:
|
||||
| Service | URL | Container |
|
||||
|---------|-----|-----------|
|
||||
| Dashboard | https://mana.how | manacore-web |
|
||||
| Auth API | https://auth.mana.how | mana-core-auth |
|
||||
| Chat | https://chat.mana.how | chat-web |
|
||||
| Chat API | https://chat-api.mana.how | chat-backend |
|
||||
| Todo | https://todo.mana.how | todo-web |
|
||||
| Todo API | https://todo-api.mana.how | todo-backend |
|
||||
| Calendar | https://calendar.mana.how | calendar-web |
|
||||
| Calendar API | https://calendar-api.mana.how | calendar-backend |
|
||||
| Clock | https://clock.mana.how | clock-web |
|
||||
| Clock API | https://clock-api.mana.how | clock-backend |
|
||||
| Contacts | https://contacts.mana.how | contacts-web |
|
||||
| Contacts API | https://contacts-api.mana.how | contacts-backend |
|
||||
|
||||
#### Build and Push Images
|
||||
## Rollback
|
||||
|
||||
```bash
|
||||
# Build all services
|
||||
./scripts/deploy/build-and-push.sh all latest
|
||||
ssh mana-server
|
||||
cd ~/projects/manacore-monorepo
|
||||
|
||||
# Build specific service
|
||||
./scripts/deploy/build-and-push.sh chat-backend v1.2.3
|
||||
# Rollback to specific image tag
|
||||
docker compose -f docker-compose.macmini.yml pull <service>:<tag>
|
||||
docker compose -f docker-compose.macmini.yml up -d <service>
|
||||
```
|
||||
|
||||
#### Deploy to Server
|
||||
## Detailed Documentation
|
||||
|
||||
```bash
|
||||
# Deploy to staging
|
||||
export STAGING_HOST=staging.manacore.app
|
||||
export STAGING_USER=deploy
|
||||
./scripts/deploy/deploy-hetzner.sh staging all
|
||||
|
||||
# Deploy to production
|
||||
export PRODUCTION_HOST=api.manacore.app
|
||||
export PRODUCTION_USER=deploy
|
||||
./scripts/deploy/deploy-hetzner.sh production all
|
||||
```
|
||||
|
||||
#### Health Checks
|
||||
|
||||
```bash
|
||||
# Check staging
|
||||
./scripts/deploy/health-check.sh staging
|
||||
|
||||
# Check production
|
||||
./scripts/deploy/health-check.sh production
|
||||
```
|
||||
|
||||
#### Database Migrations
|
||||
|
||||
```bash
|
||||
# Run migrations for specific project
|
||||
./scripts/deploy/migrate-db.sh chat staging
|
||||
./scripts/deploy/migrate-db.sh mana-core-auth production
|
||||
```
|
||||
|
||||
## Rollback Procedures
|
||||
|
||||
### Automated Rollback (Recommended)
|
||||
|
||||
```bash
|
||||
# Rollback staging
|
||||
./scripts/deploy/rollback.sh staging all
|
||||
|
||||
# Rollback production (specific service)
|
||||
./scripts/deploy/rollback.sh production chat-backend
|
||||
```
|
||||
|
||||
**What the script does**:
|
||||
|
||||
1. Confirms rollback with user
|
||||
2. Checks for previous deployment backup
|
||||
3. Stops current services
|
||||
4. Restores previous docker-compose configuration
|
||||
5. Restores database (if applicable)
|
||||
6. Starts services with previous version
|
||||
7. Runs health checks
|
||||
8. Reports status
|
||||
|
||||
### Manual Rollback
|
||||
|
||||
If automated rollback fails:
|
||||
|
||||
```bash
|
||||
# SSH to server
|
||||
ssh deploy@api.manacore.app
|
||||
cd ~/manacore-production
|
||||
|
||||
# List available backups
|
||||
ls -lt backups/
|
||||
|
||||
# Choose backup
|
||||
BACKUP_DIR=backups/20250127_120000
|
||||
|
||||
# Restore configuration
|
||||
cp $BACKUP_DIR/docker-compose.yml ./docker-compose.yml
|
||||
cp $BACKUP_DIR/.env.backup ./.env
|
||||
|
||||
# Restore database (if needed)
|
||||
docker compose exec -T postgres psql -U postgres < $BACKUP_DIR/postgres_backup.sql
|
||||
|
||||
# Restart services
|
||||
docker compose up -d
|
||||
|
||||
# Check status
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
## Monitoring and Maintenance
|
||||
|
||||
### Log Management
|
||||
|
||||
```bash
|
||||
# View logs for all services
|
||||
docker compose logs -f
|
||||
|
||||
# View logs for specific service
|
||||
docker compose logs -f mana-core-auth
|
||||
|
||||
# View last 100 lines
|
||||
docker compose logs --tail=100 chat-backend
|
||||
|
||||
# Search logs
|
||||
docker compose logs | grep ERROR
|
||||
```
|
||||
|
||||
### Resource Monitoring
|
||||
|
||||
```bash
|
||||
# Check container resources
|
||||
docker stats
|
||||
|
||||
# Check disk usage
|
||||
docker system df
|
||||
|
||||
# Cleanup unused resources
|
||||
docker system prune -a
|
||||
```
|
||||
|
||||
### Database Backups
|
||||
|
||||
Automated backups are created before each production deployment.
|
||||
|
||||
**Manual backup**:
|
||||
|
||||
```bash
|
||||
# Create backup
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
docker compose exec -T postgres pg_dumpall -U postgres > backup_$TIMESTAMP.sql
|
||||
|
||||
# Restore from backup
|
||||
docker compose exec -T postgres psql -U postgres < backup_20250127.sql
|
||||
```
|
||||
|
||||
### Health Monitoring
|
||||
|
||||
Set up external monitoring tools to ping health endpoints:
|
||||
|
||||
- Mana Core Auth: `https://api.manacore.app/api/v1/health`
|
||||
- Maerchenzauber: `https://api.manacore.app/health`
|
||||
- Chat Backend: `https://api.manacore.app/api/health`
|
||||
|
||||
Recommended tools:
|
||||
|
||||
- UptimeRobot
|
||||
- Pingdom
|
||||
- Better Uptime
|
||||
- Datadog
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Deployment Fails
|
||||
|
||||
**Issue**: Deployment workflow fails
|
||||
|
||||
**Solutions**:
|
||||
|
||||
1. Check workflow logs in GitHub Actions
|
||||
2. Verify all required secrets are set
|
||||
3. Ensure SSH access to server works
|
||||
4. Check Docker registry credentials
|
||||
|
||||
```bash
|
||||
# Test SSH access
|
||||
ssh deploy@staging.manacore.app 'echo "SSH works"'
|
||||
|
||||
# Test Docker login
|
||||
echo $DOCKER_PASSWORD | docker login -u $DOCKER_USERNAME --password-stdin
|
||||
```
|
||||
|
||||
### Health Checks Fail
|
||||
|
||||
**Issue**: Service fails health checks after deployment
|
||||
|
||||
**Solutions**:
|
||||
|
||||
1. Check service logs
|
||||
2. Verify environment variables
|
||||
3. Check database connectivity
|
||||
4. Verify port mappings
|
||||
|
||||
```bash
|
||||
# Check service logs
|
||||
docker compose logs --tail=200 mana-core-auth
|
||||
|
||||
# Test health endpoint directly
|
||||
docker compose exec mana-core-auth wget -O - http://localhost:3001/api/v1/health
|
||||
|
||||
# Check environment
|
||||
docker compose exec mana-core-auth env | grep -v PASSWORD
|
||||
```
|
||||
|
||||
### Database Connection Issues
|
||||
|
||||
**Issue**: Services can't connect to database
|
||||
|
||||
**Solutions**:
|
||||
|
||||
1. Verify database is running
|
||||
2. Check connection strings
|
||||
3. Verify credentials
|
||||
4. Check network connectivity
|
||||
|
||||
```bash
|
||||
# Check database status
|
||||
docker compose exec postgres psql -U postgres -c '\l'
|
||||
|
||||
# Test connection from service
|
||||
docker compose exec mana-core-auth nc -zv postgres 5432
|
||||
```
|
||||
|
||||
### Image Build Failures
|
||||
|
||||
**Issue**: Docker build fails in CI
|
||||
|
||||
**Solutions**:
|
||||
|
||||
1. Check Dockerfile syntax
|
||||
2. Verify all COPY paths exist
|
||||
3. Check for build dependency issues
|
||||
4. Review build logs
|
||||
|
||||
```bash
|
||||
# Test build locally
|
||||
docker buildx build --file apps/chat/apps/backend/Dockerfile .
|
||||
|
||||
# Build with verbose output
|
||||
docker buildx build --progress=plain --file apps/chat/apps/backend/Dockerfile .
|
||||
```
|
||||
|
||||
### Out of Disk Space
|
||||
|
||||
**Issue**: Server runs out of disk space
|
||||
|
||||
**Solutions**:
|
||||
|
||||
```bash
|
||||
# Check disk usage
|
||||
df -h
|
||||
|
||||
# Clean Docker resources
|
||||
docker system prune -a --volumes
|
||||
|
||||
# Remove old images
|
||||
docker image prune -a --filter "until=72h"
|
||||
|
||||
# Remove old backups
|
||||
cd ~/manacore-production/backups
|
||||
ls -t | tail -n +10 | xargs rm -rf
|
||||
```
|
||||
|
||||
### Services Not Starting
|
||||
|
||||
**Issue**: Docker Compose services fail to start
|
||||
|
||||
**Solutions**:
|
||||
|
||||
```bash
|
||||
# Check service dependencies
|
||||
docker compose config
|
||||
|
||||
# Start services one by one
|
||||
docker compose up -d postgres
|
||||
docker compose up -d redis
|
||||
docker compose up -d mana-core-auth
|
||||
|
||||
# Check startup logs
|
||||
docker compose logs --tail=100 --follow
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Always Test in Staging First
|
||||
|
||||
Never deploy directly to production without testing in staging.
|
||||
|
||||
### 2. Use Tagged Releases
|
||||
|
||||
Tag important releases:
|
||||
|
||||
```bash
|
||||
git tag -a v1.2.3 -m "Release version 1.2.3"
|
||||
git push origin v1.2.3
|
||||
```
|
||||
|
||||
### 3. Monitor After Deployment
|
||||
|
||||
Watch logs and metrics for at least 30 minutes after production deployment.
|
||||
|
||||
### 4. Communicate Deployments
|
||||
|
||||
Notify team before production deployments, especially during business hours.
|
||||
|
||||
### 5. Keep Backups
|
||||
|
||||
Always verify backups are created before production deployments.
|
||||
|
||||
### 6. Document Changes
|
||||
|
||||
Update CHANGELOG.md with notable changes for each deployment.
|
||||
|
||||
### 7. Security
|
||||
|
||||
- Rotate secrets regularly
|
||||
- Keep dependencies updated
|
||||
- Review security audit reports
|
||||
- Use least-privilege access
|
||||
|
||||
## Support
|
||||
|
||||
For deployment issues or questions:
|
||||
|
||||
1. Check this documentation
|
||||
2. Review GitHub Actions logs
|
||||
3. Check service logs on server
|
||||
4. Contact DevOps team
|
||||
|
||||
**Emergency Contact**: DevOps on-call rotation
|
||||
- **[MAC_MINI_SERVER.md](MAC_MINI_SERVER.md)** - Complete server setup, autostart, health checks
|
||||
- **[LOCAL_DEVELOPMENT.md](LOCAL_DEVELOPMENT.md)** - Local development setup
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,949 +0,0 @@
|
|||
# Manacore Monorepo - Deployment Architecture Diagrams
|
||||
|
||||
**Visual representation of the deployment architecture**
|
||||
|
||||
---
|
||||
|
||||
## System Overview - High-Level Architecture
|
||||
|
||||
```
|
||||
┌────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ MANACORE ECOSYSTEM │
|
||||
│ Production Deployment Architecture │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
[Internet Users]
|
||||
│
|
||||
│
|
||||
┌────────────────────┴────────────────────┐
|
||||
│ │
|
||||
▼ ▼
|
||||
┌──────────────────┐ ┌──────────────────┐
|
||||
│ Cloudflare CDN │ │ Cloudflare CDN │
|
||||
│ (Static Assets) │ │ (DDoS/Cache) │
|
||||
└────────┬─────────┘ └────────┬─────────┘
|
||||
│ │
|
||||
│ Astro Landing Pages │ App Traffic
|
||||
│ (Nginx/Static) │
|
||||
▼ ▼
|
||||
┌──────────────────┐ ┌──────────────────┐
|
||||
│ Landing Servers │ │ Coolify/K8s LB │
|
||||
│ - chat.app │ │ (Load Balancer) │
|
||||
│ - picture.app │ └────────┬─────────┘
|
||||
│ - memoro.app │ │
|
||||
└──────────────────┘ ┌─────────────────┼─────────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ Web Apps │ │ API Backends │ │ Auth Service │
|
||||
│ (SvelteKit) │ │ (NestJS) │ │ (Core Auth) │
|
||||
├──────────────┤ ├──────────────┤ ├──────────────┤
|
||||
│ chat-web │ │chat-backend │ │mana-core-auth│
|
||||
│ picture-web │ │picture-api │ │ Port: 3001 │
|
||||
│ memoro-web │ │maerchen-api │ └──────┬───────┘
|
||||
│ ...9 apps │ │ ...10 APIs │ │
|
||||
└──────┬───────┘ └──────┬───────┘ │
|
||||
│ │ │
|
||||
└─────────────────┼─────────────────┘
|
||||
│
|
||||
┌─────────────────┴─────────────────┐
|
||||
│ │
|
||||
▼ ▼
|
||||
┌──────────────┐ ┌──────────────┐
|
||||
│ PostgreSQL │ │ Redis │
|
||||
│ (Supabase) │ │ (Cache) │
|
||||
├──────────────┤ ├──────────────┤
|
||||
│ chat_db │ │ Sessions │
|
||||
│ picture_db │ │ Credits │
|
||||
│ memoro_db │ │ Rate Limits │
|
||||
│ manacore_db │ └──────────────┘
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Container Hierarchy - Docker Layer Structure
|
||||
|
||||
```
|
||||
┌────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ MULTI-STAGE BUILD ARCHITECTURE │
|
||||
│ (Optimized for pnpm Workspace Monorepo) │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
[STAGE 1: BASE]
|
||||
│
|
||||
│ FROM node:20-alpine
|
||||
│ COPY pnpm-workspace.yaml
|
||||
│ COPY package.json
|
||||
│ COPY pnpm-lock.yaml
|
||||
│
|
||||
▼
|
||||
┌─────────────────────┐
|
||||
│ Workspace Setup │
|
||||
│ Size: ~150 MB │
|
||||
└──────────┬──────────┘
|
||||
│
|
||||
┌────────────┴────────────┐
|
||||
│ │
|
||||
▼ ▼
|
||||
[STAGE 2: DEPENDENCIES] [STAGE 2: DEPENDENCIES]
|
||||
│ │
|
||||
│ pnpm install │ pnpm install
|
||||
│ --frozen-lockfile │ --frozen-lockfile
|
||||
│ │
|
||||
▼ ▼
|
||||
┌─────────────────────┐ ┌─────────────────────┐
|
||||
│ Backend Dependencies│ │ Frontend Dependencies│
|
||||
│ Size: ~400 MB │ │ Size: ~500 MB │
|
||||
└──────────┬──────────┘ └──────────┬───────────┘
|
||||
│ │
|
||||
│ COPY packages/ │ COPY packages/
|
||||
│ RUN pnpm build │ RUN pnpm build
|
||||
│ │
|
||||
▼ ▼
|
||||
[STAGE 3: BUILDER] [STAGE 3: BUILDER]
|
||||
│ │
|
||||
│ COPY apps/*/backend │ COPY apps/*/web
|
||||
│ RUN pnpm build │ RUN pnpm build
|
||||
│ │
|
||||
▼ ▼
|
||||
┌─────────────────────┐ ┌─────────────────────┐
|
||||
│ Built Backend │ │ Built Frontend │
|
||||
│ (dist/) │ │ (build/) │
|
||||
│ Size: ~50 MB │ │ Size: ~20 MB │
|
||||
└──────────┬──────────┘ └──────────┬───────────┘
|
||||
│ │
|
||||
│ Multi-stage copy │ Multi-stage copy
|
||||
│ │
|
||||
▼ ▼
|
||||
[STAGE 4: PRODUCTION] [STAGE 4: PRODUCTION]
|
||||
│ │
|
||||
│ FROM node:20-alpine │ FROM node:20-alpine
|
||||
│ COPY --from=builder │ COPY --from=builder
|
||||
│ USER nodejs (1001) │ USER nodejs (1001)
|
||||
│ │
|
||||
▼ ▼
|
||||
┌─────────────────────┐ ┌─────────────────────┐
|
||||
│ chat-backend │ │ chat-web │
|
||||
│ Final: 180 MB │ │ Final: 170 MB │
|
||||
│ Port: 3002 │ │ Port: 3000 │
|
||||
└─────────────────────┘ └─────────────────────┘
|
||||
|
||||
[ASTRO LANDING PAGES]
|
||||
│
|
||||
│ FROM node:20-alpine (builder)
|
||||
│ RUN pnpm build (static files)
|
||||
│
|
||||
▼
|
||||
┌─────────────────────┐
|
||||
│ Static Build │
|
||||
│ (dist/) │
|
||||
│ Size: ~5 MB │
|
||||
└──────────┬──────────┘
|
||||
│
|
||||
│ FROM nginx:1.25-alpine
|
||||
│ COPY --from=builder dist/
|
||||
│
|
||||
▼
|
||||
┌─────────────────────┐
|
||||
│ chat-landing │
|
||||
│ Final: 45 MB │
|
||||
│ Port: 80 │
|
||||
└─────────────────────┘
|
||||
|
||||
CACHE BENEFITS:
|
||||
Layer 1 (Base): 99% cache hit rate (workspace config rarely changes)
|
||||
Layer 2 (Deps): 80% cache hit rate (dependencies change weekly)
|
||||
Layer 3 (Build): 0% cache hit rate (source code changes frequently)
|
||||
|
||||
TOTAL BUILD TIME:
|
||||
- Without cache: ~12-15 minutes
|
||||
- With cache: ~2-3 minutes
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Network Topology - Production Environment
|
||||
|
||||
```
|
||||
┌────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ NETWORK ARCHITECTURE │
|
||||
│ (Ports, Protocols, Security) │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────┐
|
||||
│ Internet (Public) │
|
||||
│ 0.0.0.0/0 │
|
||||
└────────────┬────────────────────┘
|
||||
│
|
||||
│ Port 443 (HTTPS)
|
||||
│ Port 80 (HTTP → 443 redirect)
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────┐
|
||||
│ Cloudflare / Coolify Proxy │
|
||||
│ - DDoS Protection │
|
||||
│ - SSL Termination │
|
||||
│ - Rate Limiting │
|
||||
└────────────┬────────────────────┘
|
||||
│
|
||||
┌───────────────────────┼───────────────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌──────────────────┐ ┌──────────────────┐ ┌──────────────────┐
|
||||
│ Frontend Net │ │ Backend Net │ │ Data Net │
|
||||
│ (Public) │ │ (Private) │ │ (Private) │
|
||||
└──────────────────┘ └──────────────────┘ └──────────────────┘
|
||||
│ │ │
|
||||
│ │ │
|
||||
┌───────┴───────┐ ┌───────┴───────┐ ┌───────┴───────┐
|
||||
│ │ │ │ │ │
|
||||
▼ ▼ ▼ ▼ ▼ ▼
|
||||
┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐
|
||||
│ Nginx │ │SvelteKit│ │ NestJS │ │ NestJS │ │Postgres │ │ Redis │
|
||||
│ (Astro) │ │ (Web) │ │ Backend │ │ Auth │ │(Supabase)│ │ Cache │
|
||||
├─────────┤ ├─────────┤ ├─────────┤ ├─────────┤ ├─────────┤ ├─────────┤
|
||||
│Port: 80 │ │Port:3100│ │Port:3002│ │Port:3001│ │Port:5432│ │Port:6379│
|
||||
│Public │ │Internal │ │Internal │ │Internal │ │Internal │ │Internal │
|
||||
└─────────┘ └─────────┘ └────┬────┘ └────┬────┘ └─────────┘ └─────────┘
|
||||
│ │
|
||||
│ DB Conn │ DB Conn
|
||||
│ Pool: 10 │ Pool: 10
|
||||
│ │
|
||||
└───────────┴────────> PostgreSQL
|
||||
│
|
||||
└────────> Redis
|
||||
|
||||
NETWORK SECURITY RULES:
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ INGRESS RULES (Firewall) │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ Port 22 (SSH) - Source: DevOps IPs only │
|
||||
│ Port 80 (HTTP) - Source: 0.0.0.0/0 (Redirect to 443) │
|
||||
│ Port 443 (HTTPS) - Source: 0.0.0.0/0 │
|
||||
│ Port 3001-3200 (Apps) - DENY (Internal only) │
|
||||
│ Port 5432 (PostgreSQL) - DENY (Internal only) │
|
||||
│ Port 6379 (Redis) - DENY (Internal only) │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ DOCKER NETWORK SEGMENTATION │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ frontend-network: SvelteKit, Astro, Nginx │
|
||||
│ backend-network: NestJS APIs, Auth Service │
|
||||
│ data-network: PostgreSQL, Redis (no internet access) │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
|
||||
SSL/TLS CONFIGURATION:
|
||||
|
||||
Certificate Provider: Let's Encrypt (Coolify auto-provision)
|
||||
Protocols: TLSv1.2, TLSv1.3
|
||||
Cipher Suites: HIGH:!aNULL:!MD5:!3DES
|
||||
HSTS: max-age=31536000; includeSubDomains; preload
|
||||
Certificate Renewal: Automatic (30 days before expiry)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Data Flow - Request Lifecycle
|
||||
|
||||
```
|
||||
┌────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ REQUEST LIFECYCLE (Chat API Example) │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
[1] User Request
|
||||
│
|
||||
│ POST https://api-chat.manacore.app/api/chat/completions
|
||||
│ Headers: Authorization: Bearer <manaToken>
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Cloudflare Edge (CDN) │ ← Geographically closest data center
|
||||
│ - Check cache (miss) │
|
||||
│ - DDoS protection │
|
||||
│ - Rate limiting │
|
||||
└─────────────┬─────────────┘
|
||||
│
|
||||
│ HTTPS (TLS 1.3)
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Coolify Reverse Proxy │
|
||||
│ - SSL termination │
|
||||
│ - Route to container │
|
||||
│ - Health check │
|
||||
└─────────────┬─────────────┘
|
||||
│
|
||||
│ HTTP (internal network)
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Chat Backend (NestJS) │
|
||||
│ Container: chat-backend │
|
||||
│ Port: 3002 │
|
||||
└─────────────┬─────────────┘
|
||||
│
|
||||
│ [2] Authentication Middleware
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Verify JWT Token │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ Extract manaToken │ │
|
||||
│ │ Decode JWT │ │
|
||||
│ │ Verify signature │ │
|
||||
│ │ Check expiry │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ JWT Claims: { sub: userId, role: user, app_id: chat }
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Credits Check │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ Query Redis cache │ │
|
||||
│ │ Key: credits:{id} │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ Cache MISS
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Query PostgreSQL │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ SELECT credits │ │
|
||||
│ │ FROM users │ │
|
||||
│ │ WHERE id = userId │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ Credits: 50 (sufficient)
|
||||
│ Cache: SET credits:{id} 50 EX 300
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ [3] Business Logic │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ Parse request │ │
|
||||
│ │ Validate input │ │
|
||||
│ │ Call Azure OpenAI │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ HTTP POST to Azure
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Azure OpenAI API │
|
||||
│ Model: GPT-4o-mini │
|
||||
│ Latency: ~800ms │
|
||||
└─────────────┬─────────────┘
|
||||
│
|
||||
│ AI Response
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ [4] Save to Database │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ INSERT message │ │
|
||||
│ │ UPDATE credits │ │
|
||||
│ │ (credits - 1) │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ Transaction committed
|
||||
│ Invalidate cache: DEL credits:{id}
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ [5] Return Response │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ HTTP 200 OK │ │
|
||||
│ │ { │ │
|
||||
│ │ "message": "...", │ │
|
||||
│ │ "credits": 49 │ │
|
||||
│ │ } │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ Response time: ~1.2s total
|
||||
│
|
||||
▼
|
||||
[6] User receives AI response
|
||||
|
||||
PERFORMANCE BREAKDOWN:
|
||||
- Cloudflare routing: ~20ms
|
||||
- SSL handshake: ~50ms (cached session)
|
||||
- Authentication: ~10ms (JWT decode)
|
||||
- Credits check (cache): ~2ms
|
||||
- Azure OpenAI call: ~800ms (largest latency)
|
||||
- Database write: ~15ms
|
||||
- Response serialization: ~5ms
|
||||
────────────────────────────────
|
||||
TOTAL: ~902ms (p95 latency target: <1s)
|
||||
|
||||
CACHING STRATEGY:
|
||||
✅ Redis: User credits (TTL: 5 min) - Reduces DB queries by 90%
|
||||
✅ Redis: AI model list (TTL: 1 hour) - Static metadata
|
||||
❌ No cache: Chat messages (always fresh from DB)
|
||||
❌ No cache: AI completions (unique per request)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Deployment Flow - CI/CD Pipeline
|
||||
|
||||
```
|
||||
┌────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ CI/CD DEPLOYMENT PIPELINE │
|
||||
│ (GitHub Actions → Coolify) │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
[Developer]
|
||||
│
|
||||
│ git commit -m "feat: add chat model selector"
|
||||
│ git push origin feature/chat-model-selector
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ GitHub (Pull Request) │
|
||||
│ - Code review │
|
||||
│ - Automated tests │
|
||||
└─────────────┬─────────────┘
|
||||
│
|
||||
│ PR approved & merged to main
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ GITHUB ACTIONS WORKFLOW │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Job 1: Lint & Type Check │ ← Parallel execution
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ pnpm lint │ │
|
||||
│ │ pnpm type-check │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│ ✅ Passed
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Job 2: Build Docker Image│
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ docker buildx build │ │
|
||||
│ │ --cache-from cache │ │
|
||||
│ │ --cache-to cache │ │
|
||||
│ │ --push │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ Image: ghcr.io/manacore/chat-backend:main-abc1234
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Job 3: Security Scan │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ trivy image scan │ │
|
||||
│ │ Severity: HIGH+ │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│ ✅ No critical vulnerabilities
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ STAGING DEPLOYMENT │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Deploy to Staging │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ SSH to Coolify │ │
|
||||
│ │ docker compose pull │ │
|
||||
│ │ docker compose up │ │
|
||||
│ │ pnpm migration:run │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ Staging URL: https://staging-api-chat.manacore.app
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Automated Smoke Tests │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ curl /api/health │ │ ✅ 200 OK
|
||||
│ │ curl /api/models │ │ ✅ 200 OK
|
||||
│ │ POST /api/chat │ │ ✅ 200 OK
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│ ✅ All tests passed
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Manual Approval Required │ ← Human checkpoint
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ QA Team Review │ │
|
||||
│ │ Stakeholder Demo │ │
|
||||
│ │ Approve/Reject │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│ ✅ Approved
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ PRODUCTION DEPLOYMENT (Blue-Green) │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Deploy to GREEN Env │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ Blue: v1.5.2 (100%) │ │
|
||||
│ │ Green: v1.6.0 (0%) │ │
|
||||
│ │ │ │
|
||||
│ │ docker compose up │ │
|
||||
│ │ --file green.yml │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ Wait 30 seconds for startup
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Run Database Migrations │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ pnpm migration:run │ │ ← Forward-compatible migrations only
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ Migrations applied successfully
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Health Check GREEN │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ curl localhost:3002 │ │ ✅ 200 OK
|
||||
│ │ /api/health │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ GREEN environment healthy
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Canary Deployment │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ Blue: 90% traffic │ │
|
||||
│ │ Green: 10% traffic │ │
|
||||
│ │ │ │
|
||||
│ │ Monitor for 10 min │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ Metrics:
|
||||
│ - Error rate: 0.1% (✅ <1%)
|
||||
│ - Response time: 850ms (✅ <1s)
|
||||
│ - No customer complaints
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Full Cutover │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ Blue: 0% traffic │ │
|
||||
│ │ Green: 100% traffic │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ Traffic switched to GREEN
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Rollback Window (1 hour) │ ← Keep BLUE running
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ Monitor metrics │ │
|
||||
│ │ If issues: │ │
|
||||
│ │ Switch back BLUE │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ ✅ No issues detected
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────┐
|
||||
│ Decommission BLUE │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ docker compose down │ │
|
||||
│ │ --file blue.yml │ │
|
||||
│ └──────────┬──────────┘ │
|
||||
└─────────────┼─────────────┘
|
||||
│
|
||||
│ Deployment completed successfully
|
||||
│
|
||||
▼
|
||||
[Production v1.6.0 Live]
|
||||
|
||||
DEPLOYMENT TIMELINE:
|
||||
- Code merge to main: 0:00
|
||||
- CI/CD pipeline start: 0:01
|
||||
- Lint & build: 0:05 (4 min)
|
||||
- Staging deployment: 0:07 (2 min)
|
||||
- Smoke tests: 0:08 (1 min)
|
||||
- Manual approval: 0:30 (22 min - human review)
|
||||
- Production deploy (GREEN): 0:35 (5 min)
|
||||
- Canary monitoring: 0:45 (10 min)
|
||||
- Full cutover: 0:46 (1 min)
|
||||
- Rollback window: 1:46 (60 min)
|
||||
─────────────────────────────────────────────
|
||||
TOTAL TIME TO PRODUCTION: ~2 hours (mostly manual approval)
|
||||
|
||||
ROLLBACK PROCEDURE (if needed):
|
||||
1. Detect issue (error spike, customer reports)
|
||||
2. Run: coolify switch-deployment chat blue
|
||||
3. Traffic reverts to BLUE (v1.5.2) in <30 seconds
|
||||
4. Investigate issue in GREEN (offline)
|
||||
5. Fix and redeploy when ready
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Monitoring Dashboard Layout
|
||||
|
||||
```
|
||||
┌────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ GRAFANA MONITORING DASHBOARD │
|
||||
│ (Real-time Metrics) │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ SYSTEM HEALTH OVERVIEW Last Update: 12:34:56 │
|
||||
├─────────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ │
|
||||
│ │ Services │ │ Request Rate │ │ Error Rate │ │ Avg Latency │ │
|
||||
│ │ 38 / 39 │ │ 1,234 req/s │ │ 0.2% │ │ 450 ms │ │
|
||||
│ │ 🟢 Healthy │ │ 🟢 Normal │ │ 🟢 Good │ │ 🟢 Fast │ │
|
||||
│ └───────────────┘ └───────────────┘ └───────────────┘ └───────────────┘ │
|
||||
│ │
|
||||
│ ⚠️ 1 Service Warning: picture-backend (High Memory: 85%) │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ SERVICE STATUS (by Project) │
|
||||
├─────────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ Project │ Backend │ Web │ Landing │ Status │ Last Deploy │
|
||||
│ ─────────────────┼─────────┼────────┼─────────┼────────┼─────────────────────── │
|
||||
│ mana-core-auth │ 🟢 UP │ - │ - │ 100% │ 2025-11-26 10:23 │
|
||||
│ chat │ 🟢 UP │ 🟢 UP │ 🟢 UP │ 100% │ 2025-11-27 12:15 │
|
||||
│ maerchenzauber │ 🟢 UP │ 🟢 UP │ 🟢 UP │ 100% │ 2025-11-25 14:45 │
|
||||
│ picture │ 🟡 WARN│ 🟢 UP │ 🟢 UP │ 100% │ 2025-11-27 08:30 │
|
||||
│ memoro │ - │ 🟢 UP │ 🟢 UP │ 100% │ 2025-11-26 16:00 │
|
||||
│ uload │ 🟢 UP │ 🟢 UP │ 🟢 UP │ 100% │ 2025-11-24 11:20 │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ RESPONSE TIME (p95 Latency) [Last 24 hours] │
|
||||
├─────────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ 1000ms │ ╭╮ │
|
||||
│ │ ╭╯╰╮ │
|
||||
│ 800ms │ ╭╮ ╭╯ ╰╮ │
|
||||
│ │ ╭╯╰╮ ╭╯ ╰╮ │
|
||||
│ 600ms │ ╭╮ ╭╯ ╰╮ ╭╯ ╰╮ │
|
||||
│ │ ╭╮ ╭╯╰╮ ╭╯ ╰╮╭╯ ╰╮ │
|
||||
│ 400ms │─────────╭╯╰───────╯──╰──╯──────╰╯──────────╰────────── │
|
||||
│ │ ╭╯ │
|
||||
│ 200ms │ ╭────╯ │
|
||||
│ │───╯ │
|
||||
│ 0ms └─────────────────────────────────────────────────────────────────────── │
|
||||
│ 0h 6h 12h 18h 24h │
|
||||
│ │
|
||||
│ Legend: ─ chat-backend ─ picture-backend ─ Target (500ms) │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ RESOURCE UTILIZATION │
|
||||
├─────────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ CPU Usage (%) Memory Usage (%) Disk I/O (MB/s) │
|
||||
│ ┌────────────────┐ ┌────────────────┐ ┌────────────────┐ │
|
||||
│ │ [████████░░] 45│ │ [██████░░░░] 60│ │ [███░░░░░░░] 30│ │
|
||||
│ └────────────────┘ └────────────────┘ └────────────────┘ │
|
||||
│ │
|
||||
│ Top Consumers: Top Consumers: Top Consumers: │
|
||||
│ 1. picture-api 25% 1. picture-api 85% 1. postgres 25 MB/s │
|
||||
│ 2. chat-api 10% 2. chat-web 70% 2. redis 3 MB/s │
|
||||
│ 3. postgres 8% 3. postgres 60% 3. chat-api 2 MB/s │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ ACTIVE ALERTS │
|
||||
├─────────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ⚠️ WARNING │ picture-backend │ High Memory Usage (85% > 80%) │ 12:30:15 │
|
||||
│ ℹ️ INFO │ chat-backend │ Slow Query Detected (250ms) │ 12:28:42 │
|
||||
│ │
|
||||
│ 🔕 No Critical Alerts │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ DATABASE PERFORMANCE │
|
||||
├─────────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ Database │ Connections │ Query Time (avg) │ Slow Queries │ Cache Hit Rate │
|
||||
│ ───────────────┼─────────────┼──────────────────┼──────────────┼────────────── │
|
||||
│ chat │ 8 / 10 │ 45 ms │ 3 │ 98.5% │
|
||||
│ picture │ 9 / 10 │ 62 ms │ 8 │ 96.2% │
|
||||
│ manacore │ 5 / 10 │ 28 ms │ 0 │ 99.1% │
|
||||
│ │
|
||||
│ 🔍 View Slow Queries │ 📊 Connection Pool Analysis │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ EXTERNAL DEPENDENCIES │
|
||||
├─────────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ Service │ Status │ Latency │ Success Rate │ Last Check │
|
||||
│ ─────────────────────┼─────────┼─────────┼──────────────┼──────────────────── │
|
||||
│ Azure OpenAI │ 🟢 UP │ 850 ms │ 99.9% │ 12:34:50 │
|
||||
│ Supabase (chat) │ 🟢 UP │ 35 ms │ 100% │ 12:34:52 │
|
||||
│ Supabase (picture) │ 🟢 UP │ 42 ms │ 100% │ 12:34:48 │
|
||||
│ Redis Cache │ 🟢 UP │ 2 ms │ 100% │ 12:34:55 │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
ACTION BUTTONS:
|
||||
[🔄 Refresh Dashboard] [📥 Export Data] [🔔 Configure Alerts] [📖 View Logs]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Disaster Recovery Flowchart
|
||||
|
||||
```
|
||||
┌────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ DISASTER RECOVERY DECISION TREE │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
[INCIDENT DETECTED]
|
||||
│
|
||||
│ Alert triggered or customer report
|
||||
│
|
||||
▼
|
||||
┌──────────────────┐
|
||||
│ What failed? │
|
||||
└────────┬─────────┘
|
||||
│
|
||||
┌────────────────────┼────────────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ Service │ │ Database │ │ Full Server │
|
||||
│ Crash │ │ Corruption │ │ Failure │
|
||||
└──────┬───────┘ └──────┬───────┘ └──────┬───────┘
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
||||
│ Health check │ │ Verify scope │ │ Verify total │
|
||||
│ failing? │ │ of corruption │ │ server down │
|
||||
└────────┬────────┘ └────────┬────────┘ └────────┬────────┘
|
||||
│ │ │
|
||||
▼ YES ▼ Database DOWN ▼ YES
|
||||
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
||||
│ Restart │ │ Stop affected │ │ Activate │
|
||||
│ container │ │ services │ │ standby server │
|
||||
├─────────────────┤ ├─────────────────┤ ├─────────────────┤
|
||||
│ docker compose │ │ docker compose │ │ 1. Start services│
|
||||
│ restart │ │ stop chat-api │ │ 2. Restore DBs │
|
||||
│ chat-backend │ │ │ │ 3. Update DNS │
|
||||
└────────┬────────┘ └────────┬────────┘ └────────┬────────┘
|
||||
│ │ │
|
||||
│ Wait 30s │ Download backup │ ETA: 2 hours
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
||||
│ Health check │ │ Restore from │ │ Verify services │
|
||||
│ passing? │ │ latest backup │ │ healthy │
|
||||
└────────┬────────┘ ├─────────────────┤ └────────┬────────┘
|
||||
│ │ pg_restore │ │
|
||||
▼ YES │ chat.dump │ ▼ YES
|
||||
┌─────────────────┐ └────────┬────────┘ ┌─────────────────┐
|
||||
│ ✅ RESOLVED │ │ │ ✅ RESOLVED │
|
||||
│ RTO: 2 min │ ▼ DB UP │ RTO: 2 hours │
|
||||
└─────────────────┘ ┌─────────────────┐ └─────────────────┘
|
||||
│ Restart services│
|
||||
├─────────────────┤
|
||||
│ docker compose │
|
||||
│ start chat-api │
|
||||
└────────┬────────┘
|
||||
│
|
||||
▼ Services UP
|
||||
┌─────────────────┐
|
||||
│ Verify data │
|
||||
│ integrity │
|
||||
└────────┬────────┘
|
||||
│
|
||||
▼ Verified
|
||||
┌─────────────────┐
|
||||
│ ✅ RESOLVED │
|
||||
│ RTO: 20 min │
|
||||
│ RPO: <24 hours │
|
||||
└─────────────────┘
|
||||
|
||||
POST-INCIDENT ACTIONS (All Scenarios):
|
||||
1. Document timeline in incident log
|
||||
2. Notify stakeholders of resolution
|
||||
3. Schedule post-mortem meeting
|
||||
4. Identify root cause
|
||||
5. Implement preventive measures
|
||||
6. Update runbooks
|
||||
|
||||
ESCALATION PATHS:
|
||||
- Service crash (2+ restarts fail) → Call DevOps lead
|
||||
- Database corruption → Call Database admin + CTO
|
||||
- Full server failure → Call Infrastructure team + CEO
|
||||
- Security breach → Call Security team + Legal
|
||||
|
||||
COMMUNICATION TEMPLATE:
|
||||
Subject: [INCIDENT] Service Downtime - chat-backend
|
||||
|
||||
Status: INVESTIGATING / RESOLVED
|
||||
Impact: API requests failing (100% error rate)
|
||||
Affected Users: ~500 active users
|
||||
Started: 2025-11-27 12:34 UTC
|
||||
Resolved: 2025-11-27 12:38 UTC (4 min)
|
||||
RTO: 2 minutes
|
||||
|
||||
Timeline:
|
||||
- 12:34 UTC: Alert triggered (health check fail)
|
||||
- 12:35 UTC: Container restarted
|
||||
- 12:36 UTC: Health check passing
|
||||
- 12:38 UTC: Verified all API endpoints working
|
||||
|
||||
Root Cause: OOM killer terminated process (memory leak)
|
||||
|
||||
Action Items:
|
||||
1. Increase memory limit to 1GB (from 512MB)
|
||||
2. Add memory monitoring alert
|
||||
3. Investigate memory leak in code
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Legend & Symbols
|
||||
|
||||
```
|
||||
┌────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ DIAGRAM LEGEND & SYMBOLS │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
STATUS INDICATORS:
|
||||
🟢 - Healthy / Running / Success
|
||||
🟡 - Warning / Degraded Performance
|
||||
🔴 - Critical / Down / Failed
|
||||
⚪ - Unknown / Not Monitored
|
||||
⚠️ - Warning Alert
|
||||
🚨 - Critical Alert
|
||||
ℹ️ - Informational Message
|
||||
|
||||
NETWORK SYMBOLS:
|
||||
│ - Vertical connection
|
||||
─ - Horizontal connection
|
||||
┌ └ ┐ ┘ - Corners
|
||||
├ ┤ ┬ ┴ ┼ - Junctions
|
||||
→ ← - Data flow direction
|
||||
▼ ▲ - Process flow direction
|
||||
|
||||
SERVICE TYPES:
|
||||
[NestJS] - Backend API service
|
||||
[SvelteKit]- Web frontend service
|
||||
[Astro] - Static landing page
|
||||
[Postgres] - Database
|
||||
[Redis] - Cache/session store
|
||||
[Nginx] - Reverse proxy / static server
|
||||
|
||||
SECURITY LEVELS:
|
||||
Public - Accessible from internet (0.0.0.0/0)
|
||||
Internal - Private network only (Docker network)
|
||||
Protected - Firewall rules + authentication required
|
||||
|
||||
DEPLOYMENT STAGES:
|
||||
Development - Local Docker Compose
|
||||
Staging - Coolify (separate server)
|
||||
Production - Coolify (production server)
|
||||
|
||||
ABBREVIATIONS:
|
||||
RTO - Recovery Time Objective
|
||||
RPO - Recovery Point Objective
|
||||
CDN - Content Delivery Network
|
||||
SSL - Secure Sockets Layer
|
||||
TLS - Transport Layer Security
|
||||
HSTS - HTTP Strict Transport Security
|
||||
CORS - Cross-Origin Resource Sharing
|
||||
JWT - JSON Web Token
|
||||
ORM - Object-Relational Mapping
|
||||
APM - Application Performance Monitoring
|
||||
CI/CD- Continuous Integration / Continuous Deployment
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference
|
||||
|
||||
### Health Check URLs
|
||||
|
||||
```
|
||||
mana-core-auth: https://auth.manacore.app/api/health
|
||||
chat-backend: https://api-chat.manacore.app/api/health
|
||||
chat-web: https://app-chat.manacore.app/api/health
|
||||
picture-backend: https://api-picture.manacore.app/api/health
|
||||
maerchenzauber-backend:https://api-maerchenzauber.manacore.app/api/health
|
||||
```
|
||||
|
||||
### Emergency Contacts
|
||||
|
||||
```
|
||||
DevOps Lead: +XX XXX XXX XXXX (on-call: Mon-Fri 9-5)
|
||||
Database Admin: +XX XXX XXX XXXX (on-call: 24/7)
|
||||
Infrastructure: devops@manacore.app
|
||||
Security Team: security@manacore.app
|
||||
Status Page: https://status.manacore.app
|
||||
```
|
||||
|
||||
### Common Commands
|
||||
|
||||
```bash
|
||||
# Restart service
|
||||
docker compose restart chat-backend
|
||||
|
||||
# View logs (last 100 lines)
|
||||
docker compose logs --tail 100 -f chat-backend
|
||||
|
||||
# Check resource usage
|
||||
docker stats
|
||||
|
||||
# Rollback deployment
|
||||
./scripts/rollback.sh chat v1.5.2
|
||||
|
||||
# Restore database
|
||||
./scripts/restore-db.sh chat 2025-11-27
|
||||
|
||||
# Run health checks
|
||||
./scripts/health-check-all.sh
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
**End of Deployment Diagrams**
|
||||
|
|
@ -1,602 +0,0 @@
|
|||
# Hetzner Deployment Guide
|
||||
|
||||
Dieses Dokument beschreibt verschiedene Deployment-Optionen für das Manacore Monorepo auf Hetzner Cloud Infrastructure.
|
||||
|
||||
## Inhaltsverzeichnis
|
||||
|
||||
- [Bestandsaufnahme](#bestandsaufnahme)
|
||||
- [Option 1: Single Server](#option-1-single-server-einfach--günstig)
|
||||
- [Option 2: Dual-Server mit Floating IP](#option-2-dual-server-mit-floating-ip)
|
||||
- [Option 3: Kubernetes Cluster](#option-3-kubernetes-cluster-enterprise)
|
||||
- [Option 4: Hybrid mit Docker Swarm](#option-4-hybrid-mit-docker-swarm-empfohlen)
|
||||
- [Vergleichstabelle](#vergleichstabelle)
|
||||
- [Empfehlung](#empfehlung)
|
||||
- [Implementierungsdetails](#implementierungsdetails)
|
||||
|
||||
---
|
||||
|
||||
## Bestandsaufnahme
|
||||
|
||||
### Zu deployende Komponenten
|
||||
|
||||
| Typ | Anzahl | Technologie | Deployment-Ziel |
|
||||
|-----|--------|-------------|-----------------|
|
||||
| **Backends** | 10 | NestJS | Container |
|
||||
| **Web Apps** | 11 | SvelteKit (SSR) | Container |
|
||||
| **Landing Pages** | 11 | Astro (statisch) | CDN/Static |
|
||||
| **Auth Service** | 1 | NestJS | Container |
|
||||
| **Datenbanken** | 2 | PostgreSQL + Redis | Dedicated/Managed |
|
||||
| **Mobile Apps** | 10 | Expo | App Stores (nicht Hetzner) |
|
||||
|
||||
### Backend-Services im Detail
|
||||
|
||||
| Service | Package | Port | Datenbank |
|
||||
|---------|---------|------|-----------|
|
||||
| mana-core-auth | `mana-core-auth` | 3001 | PostgreSQL + Redis |
|
||||
| Chat Backend | `@chat/backend` | 3002 | PostgreSQL |
|
||||
| Maerchenzauber Backend | `@maerchenzauber/backend` | 3003 | Supabase |
|
||||
| Manadeck Backend | `@manadeck/backend` | 3004 | Supabase |
|
||||
| Picture Backend | `@picture/backend` | 3005 | PostgreSQL |
|
||||
| Transcriber Backend | `@transcriber/backend` | 3006 | Filesystem |
|
||||
| Nutriphi Backend | `@nutriphi/backend` | 3007 | Supabase |
|
||||
| News API | `@news/api` | 3008 | PostgreSQL |
|
||||
| Quote Backend | `@quote/backend` | 3009 | PostgreSQL |
|
||||
| Uload Backend | `@uload/backend` | 3010 | PostgreSQL |
|
||||
|
||||
### Ressourcenanforderungen (geschätzt)
|
||||
|
||||
| Komponente | RAM | CPU | Storage |
|
||||
|------------|-----|-----|---------|
|
||||
| NestJS Backend (pro Service) | 200-400 MB | 0.25 vCPU | 100 MB |
|
||||
| SvelteKit Web App (pro App) | 150-300 MB | 0.25 vCPU | 50 MB |
|
||||
| PostgreSQL | 1-2 GB | 1 vCPU | 10-50 GB |
|
||||
| Redis | 256-512 MB | 0.25 vCPU | 1 GB |
|
||||
| Traefik/Nginx | 128 MB | 0.25 vCPU | 100 MB |
|
||||
|
||||
**Gesamt (Minimum):** ~8 GB RAM, 4 vCPU, 100 GB Storage
|
||||
|
||||
---
|
||||
|
||||
## Option 1: Single Server (Einfach & Günstig)
|
||||
|
||||
### Kosten: ~€30-50/Monat
|
||||
|
||||
### Architektur
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ Hetzner CX41/CX51 │
|
||||
│ (8 vCPU, 16-32 GB RAM) │
|
||||
├─────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ Traefik │ │ Docker │ │ PostgreSQL │ │
|
||||
│ │ (Reverse │ │ Compose │ │ Redis │ │
|
||||
│ │ Proxy) │ │ (All Apps) │ │ │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ │
|
||||
│ Backends: 10 Container (~200MB RAM each) │
|
||||
│ Web Apps: 10 Container (SSR) │
|
||||
│ Landing: Statisch via Traefik │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Hetzner Server Empfehlung
|
||||
|
||||
| Server | vCPU | RAM | Storage | Preis |
|
||||
|--------|------|-----|---------|-------|
|
||||
| CX41 | 8 | 16 GB | 160 GB | ~€28/Monat |
|
||||
| CX51 | 16 | 32 GB | 240 GB | ~€58/Monat |
|
||||
|
||||
### Vorteile
|
||||
|
||||
- Einfache Verwaltung
|
||||
- Günstig
|
||||
- Schnelle Einrichtung
|
||||
- Ein Server = ein Backup
|
||||
|
||||
### Nachteile
|
||||
|
||||
- Kein Failover (Single Point of Failure)
|
||||
- Downtime bei Updates
|
||||
- Keine horizontale Skalierung
|
||||
- Server-Ausfall = kompletter Ausfall
|
||||
|
||||
### Wann geeignet?
|
||||
|
||||
- Entwicklung/Staging
|
||||
- MVP/Early Stage
|
||||
- Budget-kritische Projekte
|
||||
- Wenig Traffic (<1000 DAU)
|
||||
|
||||
---
|
||||
|
||||
## Option 2: Dual-Server mit Floating IP
|
||||
|
||||
### Kosten: ~€80-120/Monat
|
||||
|
||||
### Architektur
|
||||
|
||||
```
|
||||
┌─────────────────┐
|
||||
│ Floating IP │
|
||||
│ (Failover) │
|
||||
└────────┬────────┘
|
||||
│
|
||||
┌──────────────┴──────────────┐
|
||||
│ │
|
||||
┌─────────▼─────────┐ ┌──────────▼─────────┐
|
||||
│ Server 1 (CX31) │ │ Server 2 (CX31) │
|
||||
│ PRIMARY │ │ STANDBY │
|
||||
├───────────────────┤ ├────────────────────┤
|
||||
│ • Traefik │ │ • Traefik │
|
||||
│ • All Backends │◄─────►│ • All Backends │
|
||||
│ • Web Apps │ sync │ • Web Apps │
|
||||
│ • PostgreSQL │ │ • PostgreSQL │
|
||||
│ (Primary) │ │ (Replica) │
|
||||
│ • Redis │ │ • Redis Sentinel │
|
||||
└───────────────────┘ └────────────────────┘
|
||||
│ │
|
||||
└──────────────┬──────────────┘
|
||||
│
|
||||
┌────────▼────────┐
|
||||
│ Hetzner Volume │
|
||||
│ (Shared Data) │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
### Komponenten
|
||||
|
||||
| Komponente | Funktion |
|
||||
|------------|----------|
|
||||
| **Floating IP** | Virtuelle IP, die zwischen Servern wechseln kann |
|
||||
| **Keepalived** | VRRP-Daemon für automatisches Failover |
|
||||
| **PostgreSQL Streaming Replication** | Echtzeit-Datenbank-Replikation |
|
||||
| **Redis Sentinel** | Redis High Availability |
|
||||
| **Litestream/pgBackRest** | Kontinuierliche Backups |
|
||||
|
||||
### Server-Konfiguration
|
||||
|
||||
```yaml
|
||||
# Server 1 & 2 identisch
|
||||
Server: CX31
|
||||
vCPU: 4
|
||||
RAM: 8 GB
|
||||
Storage: 80 GB
|
||||
Kosten: ~€15/Monat pro Server
|
||||
|
||||
# Zusätzlich
|
||||
Floating IP: €4/Monat
|
||||
Volume (100GB): €4.40/Monat
|
||||
```
|
||||
|
||||
### Failover-Prozess
|
||||
|
||||
1. Keepalived erkennt Server-Ausfall (Health Check)
|
||||
2. Floating IP wird auf Standby-Server umgeleitet (~30 Sekunden)
|
||||
3. PostgreSQL Replica wird zu Primary promoted
|
||||
4. Redis Sentinel wählt neuen Master
|
||||
|
||||
### Vorteile
|
||||
|
||||
- Automatisches Failover (~30 Sekunden)
|
||||
- Keine Downtime bei Updates (Rolling)
|
||||
- Datenbank-Replikation
|
||||
- Gutes Preis-Leistungs-Verhältnis
|
||||
|
||||
### Nachteile
|
||||
|
||||
- Mehr Komplexität als Single Server
|
||||
- PostgreSQL Failover kann komplex sein
|
||||
- Keepalived-Konfiguration erforderlich
|
||||
|
||||
### Wann geeignet?
|
||||
|
||||
- Produktions-Workloads
|
||||
- 99.9% Uptime-Anforderung
|
||||
- Mittlerer Traffic (1000-10000 DAU)
|
||||
|
||||
---
|
||||
|
||||
## Option 3: Kubernetes Cluster (Enterprise)
|
||||
|
||||
### Kosten: ~€150-300/Monat
|
||||
|
||||
### Architektur
|
||||
|
||||
```
|
||||
┌─────────────────┐
|
||||
│ Hetzner LB │
|
||||
│ (Cloud-native) │
|
||||
└────────┬────────┘
|
||||
│
|
||||
┌─────────────────────────┼─────────────────────────┐
|
||||
│ │ │
|
||||
┌───────▼───────┐ ┌────────▼────────┐ ┌────────▼───────┐
|
||||
│ Node 1 │ │ Node 2 │ │ Node 3 │
|
||||
│ (CX21) │ │ (CX21) │ │ (CX21) │
|
||||
├───────────────┤ ├─────────────────┤ ├────────────────┤
|
||||
│ k3s Worker │ │ k3s Worker │ │ k3s Worker │
|
||||
│ • Pods │ │ • Pods │ │ • Pods │
|
||||
│ • Services │ │ • Services │ │ • Services │
|
||||
└───────────────┘ └─────────────────┘ └────────────────┘
|
||||
│ │ │
|
||||
└─────────────────────────┼─────────────────────────┘
|
||||
│
|
||||
┌─────────────┴─────────────┐
|
||||
│ │
|
||||
┌────────▼────────┐ ┌──────────▼─────────┐
|
||||
│ Hetzner Managed │ │ Hetzner Volume │
|
||||
│ PostgreSQL │ │ (Persistent) │
|
||||
│ (Optional) │ │ │
|
||||
└─────────────────┘ └────────────────────┘
|
||||
```
|
||||
|
||||
### Kubernetes Stack
|
||||
|
||||
```yaml
|
||||
Cluster:
|
||||
- k3s (leichtgewichtiges Kubernetes)
|
||||
- 3 Nodes minimum für HA Control Plane
|
||||
|
||||
Ingress:
|
||||
- Traefik (in k3s integriert)
|
||||
- oder NGINX Ingress Controller
|
||||
|
||||
TLS:
|
||||
- cert-manager
|
||||
- Let's Encrypt (automatische Zertifikate)
|
||||
|
||||
Storage:
|
||||
- Longhorn (Distributed Block Storage)
|
||||
- oder Hetzner CSI Driver
|
||||
|
||||
GitOps:
|
||||
- ArgoCD oder Flux
|
||||
- Automatische Deployments aus Git
|
||||
|
||||
Monitoring:
|
||||
- Prometheus
|
||||
- Grafana
|
||||
- Alertmanager
|
||||
|
||||
Logging:
|
||||
- Loki
|
||||
- Promtail
|
||||
```
|
||||
|
||||
### Server-Konfiguration
|
||||
|
||||
```yaml
|
||||
# k3s Nodes
|
||||
3x CX21:
|
||||
vCPU: 2
|
||||
RAM: 4 GB
|
||||
Storage: 40 GB
|
||||
Kosten: ~€6/Monat pro Node = €18/Monat
|
||||
|
||||
# Oder für mehr Ressourcen
|
||||
3x CX31:
|
||||
vCPU: 4
|
||||
RAM: 8 GB
|
||||
Storage: 80 GB
|
||||
Kosten: ~€15/Monat pro Node = €45/Monat
|
||||
|
||||
# Load Balancer
|
||||
Hetzner LB: €5/Monat
|
||||
|
||||
# Volumes für Persistent Storage
|
||||
3x 50GB Volumes: ~€7/Monat
|
||||
```
|
||||
|
||||
### Vorteile
|
||||
|
||||
- Auto-Scaling (Horizontal Pod Autoscaler)
|
||||
- Self-Healing (automatischer Pod-Restart)
|
||||
- Rolling Updates ohne Downtime
|
||||
- Deklarative Konfiguration
|
||||
- Multi-Zone möglich
|
||||
- Industry Standard
|
||||
|
||||
### Nachteile
|
||||
|
||||
- Hohe Komplexität
|
||||
- Steile Lernkurve
|
||||
- Overhead für kleine Teams
|
||||
- Mehr Ressourcen für Control Plane
|
||||
|
||||
### Wann geeignet?
|
||||
|
||||
- Enterprise-Anforderungen
|
||||
- Großes Team mit K8s-Erfahrung
|
||||
- Hoher Traffic (>10000 DAU)
|
||||
- Microservices-Architektur
|
||||
- Multi-Tenant-Anforderungen
|
||||
|
||||
---
|
||||
|
||||
## Option 4: Hybrid mit Docker Swarm (Empfohlen)
|
||||
|
||||
### Kosten: ~€100-150/Monat
|
||||
|
||||
### Architektur
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ HETZNER CLOUD │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌─────────────────┐ ┌─────────────────┐ │
|
||||
│ │ Load Balancer │ │ Cloud Firewall │ │
|
||||
│ │ (Hetzner LB) │ │ │ │
|
||||
│ └────────┬────────┘ └──────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌────────┴────────────────────────────────┐ │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌──────────────────┐ ┌──────────────────┐ │
|
||||
│ │ App Server 1 │ │ App Server 2 │ │
|
||||
│ │ (CX31) │ │ (CX31) │ │
|
||||
│ ├──────────────────┤ ├──────────────────┤ │
|
||||
│ │ Docker Swarm │◄────────────►│ Docker Swarm │ │
|
||||
│ │ Manager + Worker │ Overlay │ Manager + Worker │ │
|
||||
│ │ │ Network │ │ │
|
||||
│ │ • All Backends │ │ • All Backends │ │
|
||||
│ │ • Web Apps │ │ • Web Apps │ │
|
||||
│ │ • Traefik │ │ • Traefik │ │
|
||||
│ └──────────────────┘ └──────────────────┘ │
|
||||
│ │ │ │
|
||||
│ └────────────────┬───────────────┘ │
|
||||
│ │ │
|
||||
│ ┌────────▼────────┐ │
|
||||
│ │ DB Server │ │
|
||||
│ │ (CX21) │ │
|
||||
│ ├─────────────────┤ │
|
||||
│ │ • PostgreSQL 16 │ │
|
||||
│ │ • Redis 7 │ │
|
||||
│ │ • Daily Backups │ │
|
||||
│ │ → Object │ │
|
||||
│ │ Storage │ │
|
||||
│ └─────────────────┘ │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ Hetzner Object Storage │ │
|
||||
│ │ (Backups, Static Assets, Media) │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
|
||||
│
|
||||
▼
|
||||
┌───────────────────────────────┐
|
||||
│ EXTERNAL CDN │
|
||||
│ (Cloudflare Free) │
|
||||
│ • Static Assets │
|
||||
│ • DDoS Protection │
|
||||
│ • SSL Termination │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
### Warum Docker Swarm?
|
||||
|
||||
Docker Swarm bietet die wichtigsten Features von Kubernetes mit deutlich weniger Komplexität:
|
||||
|
||||
| Feature | Docker Swarm | Kubernetes |
|
||||
|---------|--------------|------------|
|
||||
| Lernkurve | Niedrig | Hoch |
|
||||
| Setup-Zeit | Minuten | Stunden/Tage |
|
||||
| Service Discovery | Built-in | Benötigt Config |
|
||||
| Load Balancing | Built-in | Benötigt Ingress |
|
||||
| Rolling Updates | Built-in | Built-in |
|
||||
| Secrets Management | Built-in | Built-in |
|
||||
| Ressourcen-Overhead | Minimal | Signifikant |
|
||||
|
||||
### Server-Konfiguration
|
||||
|
||||
```yaml
|
||||
# App Server 1 & 2
|
||||
2x CX31:
|
||||
vCPU: 4
|
||||
RAM: 8 GB
|
||||
Storage: 80 GB
|
||||
Kosten: €15/Monat × 2 = €30/Monat
|
||||
|
||||
# Database Server
|
||||
1x CX21:
|
||||
vCPU: 2
|
||||
RAM: 4 GB
|
||||
Storage: 40 GB + 100GB Volume
|
||||
Kosten: €6/Monat + €4.40/Monat = €10.40/Monat
|
||||
|
||||
# Load Balancer
|
||||
Hetzner LB:
|
||||
Kosten: €5/Monat
|
||||
|
||||
# Object Storage (Backups)
|
||||
100 GB:
|
||||
Kosten: ~€5/Monat
|
||||
|
||||
# Cloud Firewall
|
||||
Kostenlos
|
||||
|
||||
# Private Network
|
||||
Kostenlos
|
||||
|
||||
─────────────────────────────
|
||||
Gesamt: ~€50-55/Monat Basis
|
||||
+ Traffic-Kosten
|
||||
```
|
||||
|
||||
### Docker Swarm Stack
|
||||
|
||||
```yaml
|
||||
# docker-stack.yml
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
# Reverse Proxy
|
||||
traefik:
|
||||
image: traefik:v3.0
|
||||
deploy:
|
||||
replicas: 2
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
- traefik-certs:/letsencrypt
|
||||
|
||||
# Auth Service
|
||||
mana-core-auth:
|
||||
image: ghcr.io/your-org/mana-core-auth:latest
|
||||
deploy:
|
||||
replicas: 2
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
failure_action: rollback
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://...
|
||||
labels:
|
||||
- "traefik.http.routers.auth.rule=Host(`auth.yourdomain.com`)"
|
||||
|
||||
# Backend Services (repeat for each)
|
||||
chat-backend:
|
||||
image: ghcr.io/your-org/chat-backend:latest
|
||||
deploy:
|
||||
replicas: 2
|
||||
labels:
|
||||
- "traefik.http.routers.chat-api.rule=Host(`api.chat.yourdomain.com`)"
|
||||
|
||||
# Web Apps (repeat for each)
|
||||
chat-web:
|
||||
image: ghcr.io/your-org/chat-web:latest
|
||||
deploy:
|
||||
replicas: 2
|
||||
labels:
|
||||
- "traefik.http.routers.chat-web.rule=Host(`chat.yourdomain.com`)"
|
||||
|
||||
volumes:
|
||||
traefik-certs:
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
```
|
||||
|
||||
### Vorteile
|
||||
|
||||
- Einfacher als Kubernetes
|
||||
- Native Docker-Erfahrung nutzbar
|
||||
- Built-in Service Discovery & Load Balancing
|
||||
- Rolling Updates ohne Downtime
|
||||
- Overlay-Network für sichere Kommunikation
|
||||
- Hetzner LB für echte HA
|
||||
|
||||
### Nachteile
|
||||
|
||||
- Weniger Features als Kubernetes
|
||||
- Kleineres Ökosystem
|
||||
- Kein HPA (Horizontal Pod Autoscaler)
|
||||
|
||||
### Wann geeignet?
|
||||
|
||||
- Produktions-Workloads
|
||||
- Kleine bis mittlere Teams
|
||||
- Docker-Erfahrung vorhanden
|
||||
- Mittlerer Traffic (1000-50000 DAU)
|
||||
|
||||
---
|
||||
|
||||
## Vergleichstabelle
|
||||
|
||||
| Feature | Option 1 | Option 2 | Option 3 | Option 4 |
|
||||
|---------|----------|----------|----------|----------|
|
||||
| **Kosten/Monat** | €30-50 | €80-120 | €150-300 | €100-150 |
|
||||
| **Ausfallsicherheit** | ❌ | ✅ | ✅✅ | ✅ |
|
||||
| **Auto-Failover** | ❌ | ✅ (30s) | ✅ (<10s) | ✅ (10-30s) |
|
||||
| **Komplexität** | Niedrig | Mittel | Hoch | Mittel |
|
||||
| **Skalierbarkeit** | ❌ | ⚠️ | ✅✅ | ✅ |
|
||||
| **Zero-Downtime Deploy** | ❌ | ✅ | ✅ | ✅ |
|
||||
| **Wartungsaufwand** | Niedrig | Mittel | Hoch | Mittel |
|
||||
| **Backup/Recovery** | Manuell | Auto | Auto | Auto |
|
||||
| **Setup-Zeit** | 1 Tag | 2-3 Tage | 1 Woche | 2-3 Tage |
|
||||
| **Team-Größe** | 1 Person | 1-2 Personen | 2+ Personen | 1-2 Personen |
|
||||
|
||||
---
|
||||
|
||||
## Empfehlung
|
||||
|
||||
### Für Manacore Monorepo: **Option 4 (Hybrid mit Docker Swarm)**
|
||||
|
||||
**Begründung:**
|
||||
|
||||
1. **Richtige Balance** zwischen Komplexität und Features
|
||||
2. **Docker Swarm** ist deutlich einfacher als Kubernetes, bietet aber:
|
||||
- Service Discovery
|
||||
- Load Balancing
|
||||
- Rolling Updates
|
||||
- Health Checks
|
||||
- Secrets Management
|
||||
3. **Hetzner Load Balancer** für echte HA ohne komplexe Floating-IP-Konfiguration
|
||||
4. **Separater DB-Server** für:
|
||||
- Bessere Performance
|
||||
- Einfachere Backups
|
||||
- Unabhängige Skalierung
|
||||
5. **Cloudflare** als kostenloses CDN + DDoS-Schutz
|
||||
6. **Object Storage** für Backups und Media-Dateien
|
||||
|
||||
### Migrationspfad
|
||||
|
||||
```
|
||||
Option 1 (Dev/Staging)
|
||||
↓
|
||||
Option 4 (Production)
|
||||
↓
|
||||
Option 3 (bei Bedarf für Enterprise-Scale)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementierungsdetails
|
||||
|
||||
### Nächste Schritte
|
||||
|
||||
1. **Dockerfiles erstellen** für alle Services
|
||||
2. **CI/CD Pipeline** mit GitHub Actions
|
||||
3. **Hetzner Infrastruktur** provisionieren (Terraform)
|
||||
4. **Docker Swarm** einrichten
|
||||
5. **Monitoring** mit Prometheus/Grafana
|
||||
6. **Backup-Strategie** implementieren
|
||||
|
||||
### Geschätzte Implementierungszeit
|
||||
|
||||
| Phase | Dauer | Beschreibung |
|
||||
|-------|-------|--------------|
|
||||
| Dockerfiles | 2-3 Tage | Alle Services containerisieren |
|
||||
| CI/CD | 1-2 Tage | GitHub Actions Pipelines |
|
||||
| Infrastruktur | 1 Tag | Hetzner Setup (Terraform) |
|
||||
| Swarm Setup | 1 Tag | Cluster initialisieren |
|
||||
| Deployment | 1-2 Tage | Services deployen & testen |
|
||||
| Monitoring | 1 Tag | Prometheus, Grafana, Alerts |
|
||||
| **Gesamt** | **~1-2 Wochen** | |
|
||||
|
||||
---
|
||||
|
||||
## Weiterführende Dokumente
|
||||
|
||||
- [DOCKERFILES.md](./DOCKERFILES.md) - Docker-Konfiguration für alle Services
|
||||
- [CI_CD.md](./CI_CD.md) - GitHub Actions Pipelines
|
||||
- [TERRAFORM.md](./TERRAFORM.md) - Infrastructure as Code
|
||||
- [MONITORING.md](./MONITORING.md) - Prometheus & Grafana Setup
|
||||
- [BACKUP_STRATEGY.md](./BACKUP_STRATEGY.md) - Backup & Recovery
|
||||
|
||||
---
|
||||
|
||||
*Erstellt: November 2025*
|
||||
*Letzte Aktualisierung: November 2025*
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,750 +0,0 @@
|
|||
# Docker Setup Analysis - Current State
|
||||
|
||||
**Analysis Date**: 2025-12-01
|
||||
**Scope**: Complete monorepo Docker configuration for Hetzner deployment
|
||||
|
||||
## Executive Summary
|
||||
|
||||
The monorepo has **solid Docker foundations** with multi-environment compose files and containerized services, but requires **critical fixes** before production deployment to Hetzner.
|
||||
|
||||
**Status**: ⚠️ **Not Production Ready** - 4 critical blockers identified
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Docker Files Inventory](#docker-files-inventory)
|
||||
- [Current Architecture](#current-architecture)
|
||||
- [Containerized Services](#containerized-services)
|
||||
- [Critical Blocking Issues](#critical-blocking-issues)
|
||||
- [Configuration Gaps](#configuration-gaps)
|
||||
- [Best Practices Currently Followed](#best-practices-currently-followed)
|
||||
- [Immediate Actions Required](#immediate-actions-required)
|
||||
|
||||
---
|
||||
|
||||
## Docker Files Inventory
|
||||
|
||||
### Root-Level Compose Files
|
||||
|
||||
| File | Lines | Purpose | Status |
|
||||
|------|-------|---------|--------|
|
||||
| `docker-compose.yml` | 190 | Full production stack with Traefik, PostgreSQL, Redis, PgBouncer, Prometheus, Grafana | ⚠️ Missing configs |
|
||||
| `docker-compose.dev.yml` | 117 | Development setup with minimal infrastructure | ✅ Working |
|
||||
| `docker-compose.staging.yml` | 273 | Staging environment with 5 backends and registry images | ✅ Working |
|
||||
| `docker-compose.production.yml` | 253 | Production deployment with resource constraints | ⚠️ Missing external services |
|
||||
|
||||
### Active Service Dockerfiles
|
||||
|
||||
| Service | Path | Base Image | Status |
|
||||
|---------|------|------------|--------|
|
||||
| mana-core-auth | `services/mana-core-auth/Dockerfile` | Node 20-alpine | ✅ Working |
|
||||
| chat-backend | `apps/chat/apps/backend/Dockerfile` | Node 20-alpine | ✅ Working |
|
||||
| picture-backend | `apps/picture/apps/backend/Dockerfile` | Node 20-alpine | ✅ Working |
|
||||
| manadeck-backend | `apps/manadeck/apps/backend/Dockerfile` | Node 18 | ❌ Inconsistent |
|
||||
|
||||
### Docker Templates (Reusable)
|
||||
|
||||
```
|
||||
docker/templates/
|
||||
├── Dockerfile.nestjs # Multi-service NestJS template
|
||||
├── Dockerfile.sveltekit # SvelteKit web app template
|
||||
└── Dockerfile.astro # Astro static site with Nginx
|
||||
```
|
||||
|
||||
### Supporting Infrastructure
|
||||
|
||||
```
|
||||
docker/
|
||||
├── init-db/
|
||||
│ └── 01-create-databases.sql # Database initialization
|
||||
├── nginx/
|
||||
│ └── astro.conf # Nginx config for static sites
|
||||
├── prometheus/
|
||||
│ └── prometheus.yml # ❌ MISSING
|
||||
└── grafana/
|
||||
└── provisioning/ # ❌ MISSING
|
||||
```
|
||||
|
||||
### Entrypoint Scripts
|
||||
|
||||
- `services/mana-core-auth/docker-entrypoint.sh` ✅
|
||||
- `apps/chat/apps/backend/docker-entrypoint.sh` ✅
|
||||
- `apps/picture/apps/backend/docker-entrypoint.sh` ✅
|
||||
- `apps/manadeck/apps/backend/docker-entrypoint.sh` ❌ Missing
|
||||
|
||||
---
|
||||
|
||||
## Current Architecture
|
||||
|
||||
### Development Environment
|
||||
|
||||
**File**: `docker-compose.dev.yml`
|
||||
|
||||
```
|
||||
Services:
|
||||
- PostgreSQL 16-alpine (port 5432)
|
||||
- Redis 7-alpine (port 6379)
|
||||
- Optional services via profiles ("auth", "chat", "all")
|
||||
|
||||
Network: manacore-network (bridge)
|
||||
Health Checks: 10-second intervals
|
||||
Restart Policy: unless-stopped
|
||||
```
|
||||
|
||||
**Purpose**: Minimal stack for local development with hot reload support.
|
||||
|
||||
### Staging Environment
|
||||
|
||||
**File**: `docker-compose.staging.yml`
|
||||
|
||||
```
|
||||
Services:
|
||||
- 5 backend microservices (maerchenzauber, chat, manadeck, nutriphi, news)
|
||||
- PostgreSQL and Redis infrastructure
|
||||
- Nginx reverse proxy (ports 80/443)
|
||||
|
||||
Images: Pre-built from Docker registry
|
||||
Health Checks: 30-second intervals
|
||||
Logging: Structured JSON (10MB max-size, 3 files)
|
||||
Network: manacore-staging (bridge)
|
||||
```
|
||||
|
||||
**Purpose**: Pre-production testing environment.
|
||||
|
||||
### Production Environment
|
||||
|
||||
**File**: `docker-compose.production.yml`
|
||||
|
||||
```
|
||||
Services:
|
||||
- 5 backend microservices only (no web apps)
|
||||
- External PostgreSQL/Redis (not containerized)
|
||||
|
||||
Ports: All bound to 127.0.0.1 (localhost only)
|
||||
Resource Constraints: 1-2 CPUs, 512MB-1GB memory per service
|
||||
Volumes: None (external services)
|
||||
Network: manacore-production (bridge)
|
||||
```
|
||||
|
||||
**Purpose**: Minimal application footprint for managed infrastructure.
|
||||
|
||||
### Full Infrastructure Stack
|
||||
|
||||
**File**: `docker-compose.yml`
|
||||
|
||||
```
|
||||
Services:
|
||||
- Traefik v3.0 (reverse proxy with Let's Encrypt SSL)
|
||||
- PostgreSQL 16-alpine + PgBouncer (connection pooling)
|
||||
- Redis 7-alpine (session management)
|
||||
- Prometheus (metrics collection) ⚠️ Missing config
|
||||
- Grafana (monitoring dashboards) ⚠️ Missing provisioning
|
||||
|
||||
Features:
|
||||
- Automatic SSL via Traefik
|
||||
- Database connection pooling
|
||||
- Metrics collection
|
||||
- Dashboard monitoring
|
||||
```
|
||||
|
||||
**Purpose**: Complete on-premises deployment with monitoring.
|
||||
|
||||
---
|
||||
|
||||
## Containerized Services
|
||||
|
||||
### Active & Containerized
|
||||
|
||||
| Service | Technology | Port | Status |
|
||||
|---------|------------|------|--------|
|
||||
| mana-core-auth | NestJS | 3001 | ✅ Production Ready |
|
||||
| chat-backend | NestJS | 3002 | ✅ Production Ready |
|
||||
| picture-backend | NestJS | 3006 | ✅ Production Ready |
|
||||
| manadeck-backend | NestJS | 3009 | ⚠️ Needs Updates |
|
||||
|
||||
### Not Yet Containerized
|
||||
|
||||
**Web Apps (SvelteKit)**:
|
||||
- Templates available in `docker/templates/Dockerfile.sveltekit`
|
||||
- Need per-project Dockerfiles
|
||||
- SSR support included
|
||||
|
||||
**Landing Pages (Astro)**:
|
||||
- Templates available in `docker/templates/Dockerfile.astro`
|
||||
- Nginx configuration ready (`docker/nginx/astro.conf`)
|
||||
- Static site optimization included
|
||||
|
||||
**Mobile Apps (Expo/React Native)**:
|
||||
- Not containerized (not applicable for Hetzner deployment)
|
||||
- Built and deployed to app stores separately
|
||||
|
||||
---
|
||||
|
||||
## Critical Blocking Issues
|
||||
|
||||
### 1. ❌ Missing Prometheus Configuration
|
||||
|
||||
**Impact**: High - Blocks monitoring deployment
|
||||
**File**: `docker/prometheus/prometheus.yml`
|
||||
|
||||
**Issue**: Referenced in `docker-compose.yml` but file doesn't exist.
|
||||
|
||||
**Error**:
|
||||
```yaml
|
||||
# docker-compose.yml line ~150
|
||||
volumes:
|
||||
- ./docker/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
```
|
||||
|
||||
**Solution Required**:
|
||||
```bash
|
||||
mkdir -p docker/prometheus
|
||||
```
|
||||
|
||||
Create basic `prometheus.yml`:
|
||||
```yaml
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: 'node-exporter'
|
||||
static_configs:
|
||||
- targets: ['node-exporter:9100']
|
||||
|
||||
- job_name: 'postgres'
|
||||
static_configs:
|
||||
- targets: ['postgres:9187']
|
||||
|
||||
- job_name: 'redis'
|
||||
static_configs:
|
||||
- targets: ['redis:9121']
|
||||
```
|
||||
|
||||
### 2. ❌ Missing Grafana Provisioning
|
||||
|
||||
**Impact**: High - Blocks monitoring dashboard deployment
|
||||
**Directory**: `docker/grafana/provisioning/`
|
||||
|
||||
**Issue**: Referenced in docker-compose but directories don't exist:
|
||||
- `docker/grafana/provisioning/dashboards/`
|
||||
- `docker/grafana/provisioning/datasources/`
|
||||
|
||||
**Solution Required**:
|
||||
```bash
|
||||
mkdir -p docker/grafana/provisioning/{dashboards,datasources}
|
||||
```
|
||||
|
||||
Create `docker/grafana/provisioning/datasources/prometheus.yml`:
|
||||
```yaml
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
editable: true
|
||||
```
|
||||
|
||||
Create `docker/grafana/provisioning/dashboards/default.yml`:
|
||||
```yaml
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'Default'
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
```
|
||||
|
||||
### 3. ❌ Node Version Inconsistency
|
||||
|
||||
**Impact**: Medium - May cause runtime issues
|
||||
**File**: `apps/manadeck/apps/backend/Dockerfile`
|
||||
|
||||
**Issue**: ManaDeck uses Node 18 while all other services use Node 20.
|
||||
|
||||
**Current**:
|
||||
```dockerfile
|
||||
FROM node:18-alpine AS base
|
||||
```
|
||||
|
||||
**Should Be**:
|
||||
```dockerfile
|
||||
FROM node:20-alpine AS base
|
||||
```
|
||||
|
||||
**Location**: `/Users/wuesteon/dev/mana_universe/manacore-monorepo/apps/manadeck/apps/backend/Dockerfile:1`
|
||||
|
||||
### 4. ❌ ManaDeck Dockerfile Anomalies
|
||||
|
||||
**Impact**: Medium - Build inconsistency
|
||||
**File**: `apps/manadeck/apps/backend/Dockerfile`
|
||||
|
||||
**Issues**:
|
||||
1. Uses `npm` instead of `pnpm` (lines 15, 33, 38)
|
||||
2. Includes peer dependency workaround (`--legacy-peer-deps`)
|
||||
3. Cloud Run specific configuration (port 8080 instead of 3009)
|
||||
4. Missing proper workspace awareness
|
||||
|
||||
**Example Issue**:
|
||||
```dockerfile
|
||||
# Line 15 - Should use pnpm
|
||||
RUN npm ci --omit=dev --legacy-peer-deps
|
||||
```
|
||||
|
||||
**Solution**: Refactor to use pnpm like other services.
|
||||
|
||||
---
|
||||
|
||||
## Configuration Gaps
|
||||
|
||||
### 1. Missing Staging HTTPS/SSL Configuration
|
||||
|
||||
**Severity**: Medium
|
||||
|
||||
Staging environment (`docker-compose.staging.yml`) only has HTTP Nginx configuration. No SSL/TLS setup for testing HTTPS in staging.
|
||||
|
||||
**Recommendation**: Add Let's Encrypt staging certificates or self-signed certs.
|
||||
|
||||
### 2. Inconsistent Docker Compose at Service Level
|
||||
|
||||
**Severity**: Low
|
||||
|
||||
Only `chat` and `picture` have local `docker-compose.yml` files in their service directories. Other projects don't have service-specific compose files.
|
||||
|
||||
**Current**:
|
||||
```
|
||||
apps/chat/docker-compose.yml ✅ Exists
|
||||
apps/picture/docker-compose.yml ✅ Exists
|
||||
apps/manadeck/docker-compose.yml ❌ Missing
|
||||
apps/zitare/docker-compose.yml ❌ Missing
|
||||
apps/presi/docker-compose.yml ❌ Missing
|
||||
```
|
||||
|
||||
### 3. Database Initialization Unclear
|
||||
|
||||
**Severity**: Medium
|
||||
|
||||
Database initialization script (`docker/init-db/01-create-databases.sql`) exists, but unclear if it covers all services beyond mana-core-auth.
|
||||
|
||||
**Services Requiring Databases**:
|
||||
- mana-core-auth (PostgreSQL + Redis) ✅
|
||||
- chat-backend (PostgreSQL) ?
|
||||
- picture-backend (PostgreSQL) ?
|
||||
- manadeck-backend (Supabase external) N/A
|
||||
- zitare-backend (PostgreSQL) ?
|
||||
- presi-backend (PostgreSQL) ?
|
||||
|
||||
### 4. No Resource Limits in Development
|
||||
|
||||
**Severity**: Low
|
||||
|
||||
Development environment (`docker-compose.dev.yml`) has no resource limits, which can lead to runaway containers consuming all system resources.
|
||||
|
||||
**Recommendation**: Add development-appropriate limits (e.g., 2GB RAM per service).
|
||||
|
||||
### 5. Entrypoint Scripts Not Universal
|
||||
|
||||
**Severity**: Low
|
||||
|
||||
Not all services have entrypoint scripts for handling migrations, health checks, and graceful shutdown.
|
||||
|
||||
**Have Entrypoints**:
|
||||
- mana-core-auth ✅
|
||||
- chat-backend ✅
|
||||
- picture-backend ✅
|
||||
|
||||
**Missing Entrypoints**:
|
||||
- manadeck-backend ❌
|
||||
- zitare-backend ❌
|
||||
- presi-backend ❌
|
||||
|
||||
---
|
||||
|
||||
## Best Practices Currently Followed
|
||||
|
||||
### ✅ Multi-Stage Dockerfile Builds
|
||||
|
||||
All Dockerfiles use multi-stage builds with separate `build` and `production` stages:
|
||||
|
||||
```dockerfile
|
||||
FROM node:20-alpine AS base
|
||||
# ... setup
|
||||
|
||||
FROM base AS build
|
||||
# ... build artifacts
|
||||
|
||||
FROM node:20-alpine AS production
|
||||
# ... copy only necessary files
|
||||
```
|
||||
|
||||
**Benefit**: Smaller production images (~50% size reduction).
|
||||
|
||||
### ✅ Non-Root User Execution
|
||||
|
||||
All services run as non-root users:
|
||||
|
||||
```dockerfile
|
||||
RUN addgroup -g 1001 -S nodejs && \
|
||||
adduser -S nestjs -u 1001
|
||||
USER nestjs
|
||||
```
|
||||
|
||||
**Security Impact**: Prevents privilege escalation attacks.
|
||||
|
||||
### ✅ Alpine Base Images
|
||||
|
||||
Using Alpine Linux for minimal attack surface:
|
||||
|
||||
```dockerfile
|
||||
FROM node:20-alpine
|
||||
```
|
||||
|
||||
**Benefit**: ~40MB base image vs ~900MB for standard Node images.
|
||||
|
||||
### ✅ Health Checks on All Services
|
||||
|
||||
Comprehensive health checks with appropriate timeouts:
|
||||
|
||||
```yaml
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
```
|
||||
|
||||
### ✅ Service Dependencies with Health Conditions
|
||||
|
||||
Proper dependency orchestration:
|
||||
|
||||
```yaml
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
```
|
||||
|
||||
### ✅ Named Volumes for Data Persistence
|
||||
|
||||
Explicit volume naming for easy backup/restore:
|
||||
|
||||
```yaml
|
||||
volumes:
|
||||
postgres-data:
|
||||
driver: local
|
||||
name: manacore-postgres-data
|
||||
```
|
||||
|
||||
### ✅ Environment Variable Externalization
|
||||
|
||||
Secrets and configuration via environment files:
|
||||
|
||||
```yaml
|
||||
env_file:
|
||||
- .env.development
|
||||
- .env.production
|
||||
```
|
||||
|
||||
### ✅ Custom Bridge Networks
|
||||
|
||||
Service isolation with custom networks:
|
||||
|
||||
```yaml
|
||||
networks:
|
||||
manacore-network:
|
||||
driver: bridge
|
||||
name: manacore-network
|
||||
```
|
||||
|
||||
### ✅ Restart Policies
|
||||
|
||||
Appropriate restart policies per environment:
|
||||
|
||||
```yaml
|
||||
restart: unless-stopped # Staging/Production
|
||||
restart: on-failure # Development
|
||||
```
|
||||
|
||||
### ✅ Reverse Proxy with SSL
|
||||
|
||||
Traefik with automatic Let's Encrypt SSL:
|
||||
|
||||
```yaml
|
||||
command:
|
||||
- "--certificatesresolvers.letsencrypt.acme.httpchallenge=true"
|
||||
- "--certificatesresolvers.letsencrypt.acme.email=${ACME_EMAIL}"
|
||||
```
|
||||
|
||||
### ✅ Database Connection Pooling
|
||||
|
||||
PgBouncer integration for efficient connection management.
|
||||
|
||||
### ✅ Redis Caching Layer
|
||||
|
||||
Centralized caching with Redis for session management and performance.
|
||||
|
||||
### ✅ Docker Compose Profiles
|
||||
|
||||
Selective service startup with profiles:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
mana-core-auth:
|
||||
profiles: ["auth", "all"]
|
||||
chat-backend:
|
||||
profiles: ["chat", "all"]
|
||||
```
|
||||
|
||||
### ✅ pnpm Workspace Awareness
|
||||
|
||||
Dockerfiles properly handle pnpm workspaces:
|
||||
|
||||
```dockerfile
|
||||
COPY pnpm-workspace.yaml package.json pnpm-lock.yaml ./
|
||||
RUN pnpm fetch
|
||||
RUN pnpm install --frozen-lockfile --offline
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Best Practice Gaps
|
||||
|
||||
### Missing: Docker Build Cache Optimization
|
||||
|
||||
**Issue**: No `.dockerignore` optimization strategy across services.
|
||||
|
||||
**Impact**: Slower builds, larger build contexts sent to Docker daemon.
|
||||
|
||||
**Recommendation**: Add comprehensive `.dockerignore` files per service.
|
||||
|
||||
### Missing: Multi-Architecture Build Support
|
||||
|
||||
**Issue**: No explicit multi-architecture builds (assumes AMD64 only).
|
||||
|
||||
**Impact**: M1/M2 Mac developers may face compatibility issues.
|
||||
|
||||
**Recommendation**: Use `docker buildx` for ARM64 + AMD64 builds.
|
||||
|
||||
### Missing: Container Security Scanning
|
||||
|
||||
**Issue**: No automated security scanning (Trivy, Hadolint, etc.).
|
||||
|
||||
**Impact**: Unknown vulnerabilities in production images.
|
||||
|
||||
**Recommendation**: Add CI/CD security scanning step.
|
||||
|
||||
### Missing: Consistent Logging
|
||||
|
||||
**Issue**: Logging configuration varies across environments.
|
||||
|
||||
**Recommendation**: Standardize JSON structured logging across all environments.
|
||||
|
||||
### Missing: Docker Deployment Documentation
|
||||
|
||||
**Issue**: No step-by-step Docker deployment guide.
|
||||
|
||||
**Impact**: Difficult onboarding for new developers.
|
||||
|
||||
**Recommendation**: Create `DOCKER_DEPLOYMENT.md` with runbooks.
|
||||
|
||||
---
|
||||
|
||||
## Environment Variable Handling
|
||||
|
||||
### Root-Level `.dockerignore` Excludes
|
||||
|
||||
```
|
||||
node_modules/
|
||||
dist/
|
||||
.git/
|
||||
.env*
|
||||
*.log
|
||||
coverage/
|
||||
```
|
||||
|
||||
**Status**: ✅ Properly configured
|
||||
|
||||
### Variable Management Strategy
|
||||
|
||||
**Three-Tier Hierarchy**:
|
||||
|
||||
1. **Root `.env.development`**: Shared development variables (committed)
|
||||
2. **Environment-specific** (`.env.production`): Secrets (gitignored)
|
||||
3. **Service-specific**: Per-service overrides in compose files
|
||||
|
||||
**Key Secrets Required**:
|
||||
- `POSTGRES_PASSWORD`
|
||||
- `REDIS_PASSWORD`
|
||||
- `JWT_PRIVATE_KEY`, `JWT_PUBLIC_KEY`
|
||||
- `AZURE_OPENAI_API_KEY`
|
||||
- `GOOGLE_GENAI_API_KEY`
|
||||
- `SUPABASE_SERVICE_ROLE_KEY`
|
||||
|
||||
---
|
||||
|
||||
## Network & Volume Strategy
|
||||
|
||||
### Networks
|
||||
|
||||
**Development**: `manacore-network` (bridge)
|
||||
**Staging**: `manacore-staging` (bridge)
|
||||
**Production**: `manacore-production` (bridge)
|
||||
|
||||
**Service-to-Service Communication**: Via Docker DNS
|
||||
- `postgres:5432`
|
||||
- `redis:6379`
|
||||
- `mana-core-auth:3001`
|
||||
|
||||
### Volumes
|
||||
|
||||
**Development**:
|
||||
```yaml
|
||||
volumes:
|
||||
postgres-data: {}
|
||||
redis-data: {}
|
||||
```
|
||||
|
||||
**Staging**:
|
||||
```yaml
|
||||
volumes:
|
||||
postgres_data:
|
||||
name: manacore-staging-postgres
|
||||
redis_data:
|
||||
name: manacore-staging-redis
|
||||
```
|
||||
|
||||
**Production**: No volumes (external services assumed)
|
||||
|
||||
**Full Stack**:
|
||||
```yaml
|
||||
volumes:
|
||||
postgres-data: {}
|
||||
redis-data: {}
|
||||
traefik-letsencrypt: {}
|
||||
prometheus-data: {}
|
||||
grafana-data: {}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Immediate Actions Required
|
||||
|
||||
### Priority 1: Critical Blockers (Must Fix Before Deployment)
|
||||
|
||||
1. **Create Prometheus Configuration**
|
||||
```bash
|
||||
mkdir -p docker/prometheus
|
||||
# Create prometheus.yml (see issue #1)
|
||||
```
|
||||
|
||||
2. **Create Grafana Provisioning**
|
||||
```bash
|
||||
mkdir -p docker/grafana/provisioning/{dashboards,datasources}
|
||||
# Create provisioning files (see issue #2)
|
||||
```
|
||||
|
||||
3. **Update ManaDeck Node Version**
|
||||
```bash
|
||||
# Edit apps/manadeck/apps/backend/Dockerfile
|
||||
# Change FROM node:18-alpine to node:20-alpine
|
||||
```
|
||||
|
||||
4. **Fix ManaDeck Dockerfile**
|
||||
```bash
|
||||
# Refactor to use pnpm instead of npm
|
||||
# Remove --legacy-peer-deps
|
||||
# Fix port configuration (3009 instead of 8080)
|
||||
```
|
||||
|
||||
### Priority 2: Configuration Improvements
|
||||
|
||||
5. **Add Staging SSL Configuration**
|
||||
- Add Let's Encrypt staging environment
|
||||
- Or configure self-signed certificates
|
||||
|
||||
6. **Standardize Service Compose Files**
|
||||
- Add `docker-compose.yml` to all projects
|
||||
- Follow chat/picture pattern
|
||||
|
||||
7. **Document Database Initialization**
|
||||
- Clarify which databases are created
|
||||
- Add initialization for all services
|
||||
|
||||
8. **Add Development Resource Limits**
|
||||
- Prevent runaway containers
|
||||
- Set reasonable limits (e.g., 2GB RAM)
|
||||
|
||||
9. **Add Entrypoint Scripts**
|
||||
- Create for manadeck, zitare, presi
|
||||
- Standardize migration handling
|
||||
|
||||
### Priority 3: Best Practice Enhancements
|
||||
|
||||
10. **Optimize Docker Build Cache**
|
||||
- Add comprehensive `.dockerignore` files
|
||||
- Optimize layer ordering
|
||||
|
||||
11. **Add Multi-Architecture Support**
|
||||
- Use `docker buildx`
|
||||
- Build for AMD64 + ARM64
|
||||
|
||||
12. **Implement Security Scanning**
|
||||
- Add Trivy to CI/CD
|
||||
- Scan images before push
|
||||
|
||||
13. **Standardize Logging**
|
||||
- JSON structured logging
|
||||
- Consistent across environments
|
||||
|
||||
14. **Create Deployment Documentation**
|
||||
- Step-by-step runbooks
|
||||
- Troubleshooting guides
|
||||
|
||||
---
|
||||
|
||||
## Estimated Time to Production Ready
|
||||
|
||||
| Phase | Tasks | Time Estimate |
|
||||
|-------|-------|---------------|
|
||||
| **Phase 1: Critical Fixes** | Issues #1-4 | 2-4 hours |
|
||||
| **Phase 2: Configuration** | Issues #5-9 | 4-6 hours |
|
||||
| **Phase 3: Best Practices** | Issues #10-14 | 6-8 hours |
|
||||
| **Total** | 14 tasks | **12-18 hours** |
|
||||
|
||||
---
|
||||
|
||||
## Conclusion
|
||||
|
||||
The Docker setup demonstrates **strong architectural foundations** with:
|
||||
- Multi-environment support ✅
|
||||
- Service isolation ✅
|
||||
- Health-driven orchestration ✅
|
||||
- Security best practices ✅
|
||||
|
||||
However, **4 critical blockers** prevent immediate production deployment to Hetzner. Addressing these issues should take **2-4 hours** and will unblock staging and production deployments.
|
||||
|
||||
**Recommendation**: Fix Priority 1 items immediately, then incrementally address Priority 2 and 3 for production hardening.
|
||||
|
||||
---
|
||||
|
||||
**Related Documentation**:
|
||||
- `HETZNER_PRODUCTION_GUIDE.md` - Comprehensive Hetzner deployment guide
|
||||
- `DOCKER_COMPOSE_PRODUCTION_ARCHITECTURE.md` - Detailed architecture design
|
||||
- `DOCKER_GUIDE.md` - Docker usage and best practices
|
||||
- `DEPLOYMENT_HETZNER.md` - Deployment options comparison
|
||||
|
|
@ -1,625 +0,0 @@
|
|||
# Hetzner Deployment Summary - Quick Reference
|
||||
|
||||
**Date**: 2025-12-01
|
||||
**Status**: Complete Analysis & Documentation
|
||||
**Action Required**: Fix 4 critical blockers before deployment
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
Your monorepo has **solid Docker foundations** but needs **4 critical fixes** (2-4 hours of work) before production deployment to Hetzner.
|
||||
|
||||
### Current State: ⚠️ Not Production Ready
|
||||
|
||||
**What's Working**:
|
||||
|
||||
- Multi-environment Docker Compose setups ✅
|
||||
- 4 containerized backends (auth, chat, picture, manadeck) ✅
|
||||
- Health checks and dependency management ✅
|
||||
- Security best practices (non-root, Alpine, network isolation) ✅
|
||||
|
||||
**What Needs Fixing**:
|
||||
|
||||
1. ❌ Missing Prometheus configuration (`docker/prometheus/prometheus.yml`)
|
||||
2. ❌ Missing Grafana provisioning (`docker/grafana/provisioning/`)
|
||||
3. ❌ ManaDeck uses Node 18 (should be Node 20)
|
||||
4. ❌ ManaDeck uses npm instead of pnpm
|
||||
|
||||
---
|
||||
|
||||
## Quick Start: Get Production Ready in 2-4 Hours
|
||||
|
||||
### Step 1: Fix Critical Blockers (1 hour)
|
||||
|
||||
```bash
|
||||
# 1. Create monitoring infrastructure
|
||||
mkdir -p docker/prometheus
|
||||
mkdir -p docker/grafana/provisioning/{dashboards,datasources}
|
||||
|
||||
# 2. Create Prometheus config
|
||||
cat > docker/prometheus/prometheus.yml <<'EOF'
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
- job_name: 'docker'
|
||||
static_configs:
|
||||
- targets: ['172.17.0.1:9323']
|
||||
EOF
|
||||
|
||||
# 3. Create Grafana datasource
|
||||
cat > docker/grafana/provisioning/datasources/prometheus.yml <<'EOF'
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
EOF
|
||||
|
||||
# 4. Fix ManaDeck Dockerfile
|
||||
# Edit apps/manadeck/apps/backend/Dockerfile
|
||||
# - Change: FROM node:18-alpine → FROM node:20-alpine
|
||||
# - Replace all "npm" commands with "pnpm"
|
||||
# - Remove --legacy-peer-deps flag
|
||||
|
||||
# 5. Test locally
|
||||
pnpm docker:up
|
||||
```
|
||||
|
||||
### Step 2: Deploy to Hetzner (1-2 hours)
|
||||
|
||||
```bash
|
||||
# On Hetzner server (use "Docker CE" app during creation)
|
||||
|
||||
# 1. Run production setup script (see HETZNER_PRODUCTION_GUIDE.md)
|
||||
curl -o setup.sh https://your-repo/scripts/hetzner-setup.sh
|
||||
chmod +x setup.sh
|
||||
./setup.sh
|
||||
|
||||
# 2. Configure environment variables
|
||||
cd /app
|
||||
cp .env.production.example .env.production
|
||||
nano .env.production # Add your secrets
|
||||
|
||||
# 3. Deploy application
|
||||
docker compose -f docker-compose.production.yml up -d
|
||||
|
||||
# 4. Verify health
|
||||
curl http://localhost:3001/api/v1/health # mana-core-auth
|
||||
curl http://localhost:3002/api/health # chat-backend
|
||||
```
|
||||
|
||||
### Step 3: Setup Monitoring & Backups (1 hour)
|
||||
|
||||
```bash
|
||||
# Deploy monitoring stack
|
||||
docker compose -f docker-compose.monitoring.yml up -d
|
||||
|
||||
# Setup automated backups
|
||||
apt install borgbackup
|
||||
./scripts/setup-backups.sh
|
||||
|
||||
# Configure backup cron (daily at 2 AM)
|
||||
echo "0 2 * * * /usr/local/bin/docker-backup.sh" | crontab -
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Recommended Hetzner Setup
|
||||
|
||||
### For Your Monorepo Size (10 backends, 10 web apps)
|
||||
|
||||
**Option 1: Single Server (Development/Staging)** - €28/month
|
||||
|
||||
```
|
||||
Server: Hetzner CX33 (4 vCPU, 8GB RAM)
|
||||
- All services on one server
|
||||
- Good for staging environment
|
||||
- ~5-7 concurrent services
|
||||
```
|
||||
|
||||
**Option 2: Production HA Setup** - €37/month
|
||||
|
||||
```
|
||||
2x Hetzner CPX21 (3 vCPU, 4GB RAM) - €14/month
|
||||
+ Load Balancer - €5.39/month
|
||||
+ Volumes (3x 50GB) - €7.50/month
|
||||
+ Storage Box (500GB) - €10.11/month
|
||||
```
|
||||
|
||||
**Option 3: Full Monorepo (All Services)** - €166/month
|
||||
|
||||
```
|
||||
3x App Servers (CX33) - €84/month
|
||||
1x DB Server (CX31) - €28/month
|
||||
Load Balancer - €10/month
|
||||
Volumes + Storage Box - €44/month
|
||||
|
||||
vs AWS equivalent: $400-600/month
|
||||
Savings: 60-75%
|
||||
```
|
||||
|
||||
**Recommendation**: Start with Option 1 (staging), scale to Option 2 (production)
|
||||
|
||||
---
|
||||
|
||||
## Cost Breakdown: What You'll Pay Monthly
|
||||
|
||||
### Minimal Production (5 services)
|
||||
|
||||
```
|
||||
Server (CPX21): €7.00/month
|
||||
Volume (50GB): €2.50/month
|
||||
Storage Box (100GB): €3.81/month
|
||||
─────────────────────────────────────────
|
||||
Total: €13.81/month
|
||||
```
|
||||
|
||||
### Your Current Setup (Full Monorepo)
|
||||
|
||||
```
|
||||
3x Servers (CX33): €84.00/month
|
||||
1x Database Server: €28.00/month
|
||||
Load Balancer: €10.00/month
|
||||
Volumes (5x 100GB): €25.00/month
|
||||
Storage Box (1TB): €19.00/month
|
||||
─────────────────────────────────────────
|
||||
Total: €166.00/month
|
||||
```
|
||||
|
||||
**vs AWS/GCP**: Saves 60-75% on infrastructure costs
|
||||
|
||||
---
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Network Isolation (3-Tier)
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────┐
|
||||
│ FRONTEND NETWORK │
|
||||
│ - Traefik (reverse proxy) │
|
||||
│ - Web apps (SvelteKit) │
|
||||
│ - Landing pages (Astro) │
|
||||
└─────────────────┬───────────────────────┘
|
||||
│
|
||||
┌─────────────────▼───────────────────────┐
|
||||
│ BACKEND NETWORK │
|
||||
│ - NestJS backends │
|
||||
│ - mana-core-auth │
|
||||
│ - API services │
|
||||
└─────────────────┬───────────────────────┘
|
||||
│
|
||||
┌─────────────────▼───────────────────────┐
|
||||
│ DATABASE NETWORK (Internal) │
|
||||
│ - PostgreSQL │
|
||||
│ - Redis │
|
||||
│ - No internet access │
|
||||
└─────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Service Dependency Flow
|
||||
|
||||
```
|
||||
PostgreSQL + Redis
|
||||
↓
|
||||
mana-core-auth (Central Authentication)
|
||||
↓
|
||||
Backend Services (chat, picture, zitare, presi, manadeck)
|
||||
↓
|
||||
Web Apps (SvelteKit)
|
||||
↓
|
||||
Landing Pages (Astro)
|
||||
↓
|
||||
Traefik (SSL + Reverse Proxy)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Key Files & Locations
|
||||
|
||||
### Documentation (Created Today)
|
||||
|
||||
- `docs/DOCKER_SETUP_ANALYSIS.md` - Complete current state analysis
|
||||
- `docs/HETZNER_PRODUCTION_GUIDE.md` - Comprehensive deployment guide
|
||||
- `docs/HETZNER_DEPLOYMENT_SUMMARY.md` - This quick reference
|
||||
|
||||
### Existing Documentation
|
||||
|
||||
- `docs/DEPLOYMENT_HETZNER.md` - Deployment options comparison (German)
|
||||
- `docs/DOCKER_GUIDE.md` - Docker usage guide
|
||||
- `docs/DEPLOYMENT_ARCHITECTURE.md` - Architecture details
|
||||
|
||||
### Docker Configuration Files
|
||||
|
||||
- `docker-compose.yml` - Full stack with monitoring
|
||||
- `docker-compose.dev.yml` - Development environment
|
||||
- `docker-compose.staging.yml` - Staging deployment
|
||||
- `docker-compose.production.yml` - Production deployment
|
||||
|
||||
### Docker Templates
|
||||
|
||||
- `docker/templates/Dockerfile.nestjs` - NestJS backend template
|
||||
- `docker/templates/Dockerfile.sveltekit` - SvelteKit web template
|
||||
- `docker/templates/Dockerfile.astro` - Astro landing page template
|
||||
|
||||
### Active Service Dockerfiles
|
||||
|
||||
- `services/mana-core-auth/Dockerfile` ✅
|
||||
- `apps/chat/apps/backend/Dockerfile` ✅
|
||||
- `apps/picture/apps/backend/Dockerfile` ✅
|
||||
- `apps/manadeck/apps/backend/Dockerfile` ⚠️ Needs fixes
|
||||
|
||||
---
|
||||
|
||||
## Security Checklist
|
||||
|
||||
### Critical Security Items
|
||||
|
||||
- [ ] **SSH Configuration**
|
||||
- Disable root login
|
||||
- Disable password authentication
|
||||
- SSH keys only
|
||||
|
||||
- [ ] **Firewall Setup**
|
||||
- Hetzner Cloud Firewall (primary layer)
|
||||
- UFW on server (secondary layer)
|
||||
- Allow only ports 22, 80, 443
|
||||
|
||||
- [ ] **Docker Security**
|
||||
- Non-root containers
|
||||
- Docker secrets for production
|
||||
- Read-only filesystems where possible
|
||||
- Security updates automated
|
||||
|
||||
- [ ] **Backup Strategy**
|
||||
- Automated daily backups with Borg
|
||||
- 7 daily, 4 weekly, 6 monthly retention
|
||||
- Test restore procedure
|
||||
|
||||
---
|
||||
|
||||
## Monitoring Stack Components
|
||||
|
||||
### What You Get
|
||||
|
||||
**Metrics Collection**:
|
||||
|
||||
- Prometheus - Time-series metrics database
|
||||
- cAdvisor - Container resource usage
|
||||
- Node Exporter - Host system metrics
|
||||
|
||||
**Visualization**:
|
||||
|
||||
- Grafana - Dashboards and alerts
|
||||
- Pre-built dashboards for Docker, PostgreSQL, Redis
|
||||
|
||||
**Logging**:
|
||||
|
||||
- Loki - Log aggregation
|
||||
- Promtail - Log collection from containers
|
||||
|
||||
**Access**:
|
||||
|
||||
- Grafana UI: `http://your-server:3000`
|
||||
- Prometheus UI: `http://your-server:9090`
|
||||
|
||||
---
|
||||
|
||||
## CI/CD Integration
|
||||
|
||||
### GitHub Actions Workflow (Recommended)
|
||||
|
||||
```yaml
|
||||
# .github/workflows/deploy-hetzner.yml
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
# Build and push to GitHub Container Registry
|
||||
- name: Build and push
|
||||
run: |
|
||||
docker build -t ghcr.io/your-org/service:latest .
|
||||
docker push ghcr.io/your-org/service:latest
|
||||
|
||||
# Deploy to Hetzner via SSH
|
||||
- name: Deploy
|
||||
uses: appleboy/ssh-action@master
|
||||
with:
|
||||
host: ${{ secrets.HETZNER_HOST }}
|
||||
username: deploy
|
||||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
script: |
|
||||
cd /app
|
||||
docker compose pull
|
||||
docker compose up -d --remove-orphans
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Commands
|
||||
|
||||
### Local Development
|
||||
|
||||
```bash
|
||||
# Start all services
|
||||
pnpm docker:up
|
||||
|
||||
# Start specific project
|
||||
docker compose --profile chat up -d
|
||||
|
||||
# View logs
|
||||
docker compose logs -f chat-backend
|
||||
|
||||
# Stop everything
|
||||
docker compose down
|
||||
```
|
||||
|
||||
### Production Deployment
|
||||
|
||||
```bash
|
||||
# Deploy to production
|
||||
docker compose -f docker-compose.production.yml up -d
|
||||
|
||||
# Check service health
|
||||
docker compose ps
|
||||
|
||||
# View logs
|
||||
docker compose logs -f --tail=100
|
||||
|
||||
# Restart single service
|
||||
docker compose restart chat-backend
|
||||
|
||||
# Update single service (zero downtime)
|
||||
docker compose up -d --no-deps chat-backend
|
||||
```
|
||||
|
||||
### Monitoring
|
||||
|
||||
```bash
|
||||
# Check resource usage
|
||||
docker stats
|
||||
|
||||
# View container health
|
||||
docker inspect --format='{{.State.Health.Status}}' container-name
|
||||
|
||||
# Access Prometheus
|
||||
http://localhost:9090
|
||||
|
||||
# Access Grafana
|
||||
http://localhost:3000
|
||||
```
|
||||
|
||||
### Backup & Restore
|
||||
|
||||
```bash
|
||||
# Manual backup
|
||||
/usr/local/bin/docker-backup.sh
|
||||
|
||||
# List backups
|
||||
borg list ssh://u123456@u123456.your-storagebox.de:23/./backups
|
||||
|
||||
# Restore from backup
|
||||
borg extract ssh://u123456@u123456.your-storagebox.de:23/./backups::20251201-020000
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting Quick Reference
|
||||
|
||||
### Container Won't Start
|
||||
|
||||
```bash
|
||||
# View logs
|
||||
docker logs container-name
|
||||
|
||||
# Check exit code
|
||||
docker inspect --format='{{.State.ExitCode}}' container-name
|
||||
|
||||
# Run interactively
|
||||
docker run -it --rm image-name sh
|
||||
```
|
||||
|
||||
### High Resource Usage
|
||||
|
||||
```bash
|
||||
# Check stats
|
||||
docker stats
|
||||
|
||||
# Check disk usage
|
||||
docker system df
|
||||
|
||||
# Clean up
|
||||
docker system prune -a
|
||||
```
|
||||
|
||||
### Network Issues
|
||||
|
||||
```bash
|
||||
# Test connectivity
|
||||
docker exec container1 ping container2
|
||||
|
||||
# Check network
|
||||
docker network inspect manacore-network
|
||||
|
||||
# Restart Docker
|
||||
systemctl restart docker
|
||||
```
|
||||
|
||||
### Health Check Failing
|
||||
|
||||
```bash
|
||||
# Check health status
|
||||
docker inspect --format='{{.State.Health}}' container-name
|
||||
|
||||
# View health logs
|
||||
docker inspect --format='{{range .State.Health.Log}}{{.Output}}{{end}}' container-name
|
||||
|
||||
# Test health endpoint manually
|
||||
curl http://localhost:3000/health
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps: Priority Order
|
||||
|
||||
### Immediate (Today - 2 hours)
|
||||
|
||||
1. **Fix Critical Blockers** (See Step 1 above)
|
||||
- Create monitoring configs
|
||||
- Fix ManaDeck Dockerfile
|
||||
|
||||
2. **Test Locally**
|
||||
```bash
|
||||
pnpm docker:up
|
||||
docker compose ps # All should be healthy
|
||||
```
|
||||
|
||||
### Short Term (This Week - 4 hours)
|
||||
|
||||
3. **Provision Hetzner Server**
|
||||
- Choose server type (CX33 recommended for start)
|
||||
- Select "Docker CE" app during creation
|
||||
- Configure private network
|
||||
|
||||
4. **Initial Deployment**
|
||||
- Run production setup script
|
||||
- Deploy application
|
||||
- Configure monitoring
|
||||
|
||||
5. **Setup Backups**
|
||||
- Configure Storage Box
|
||||
- Initialize Borg repository
|
||||
- Test restore procedure
|
||||
|
||||
### Medium Term (Next Week - 8 hours)
|
||||
|
||||
6. **CI/CD Pipeline**
|
||||
- Setup GitHub Actions workflow
|
||||
- Configure secrets
|
||||
- Test automated deployment
|
||||
|
||||
7. **Security Hardening**
|
||||
- Configure Hetzner Cloud Firewall
|
||||
- Setup fail2ban
|
||||
- Enable automatic security updates
|
||||
|
||||
8. **Load Testing**
|
||||
- Test with expected load
|
||||
- Tune resource limits
|
||||
- Optimize performance
|
||||
|
||||
### Long Term (Ongoing)
|
||||
|
||||
9. **Documentation**
|
||||
- Create runbooks for common tasks
|
||||
- Document incident response
|
||||
- Team training
|
||||
|
||||
10. **Optimization**
|
||||
- Monitor costs
|
||||
- Right-size resources
|
||||
- Implement auto-scaling if needed
|
||||
|
||||
---
|
||||
|
||||
## Success Metrics
|
||||
|
||||
### How to Know You're Production Ready
|
||||
|
||||
✅ **Infrastructure**
|
||||
|
||||
- [ ] Server accessible via SSH with key authentication
|
||||
- [ ] Docker and docker-compose installed and working
|
||||
- [ ] Firewall configured (Hetzner + UFW)
|
||||
- [ ] Private network configured (if multi-server)
|
||||
|
||||
✅ **Application**
|
||||
|
||||
- [ ] All services start and pass health checks
|
||||
- [ ] Environment variables properly configured
|
||||
- [ ] SSL/TLS working (Let's Encrypt)
|
||||
- [ ] Database migrations run successfully
|
||||
|
||||
✅ **Monitoring**
|
||||
|
||||
- [ ] Prometheus collecting metrics
|
||||
- [ ] Grafana dashboards accessible
|
||||
- [ ] Alerts configured and tested
|
||||
- [ ] Logs centralized in Loki
|
||||
|
||||
✅ **Backups**
|
||||
|
||||
- [ ] Automated daily backups running
|
||||
- [ ] Storage Box configured
|
||||
- [ ] Restore procedure tested
|
||||
- [ ] Retention policy configured
|
||||
|
||||
✅ **CI/CD**
|
||||
|
||||
- [ ] GitHub Actions workflow working
|
||||
- [ ] Automated deployments successful
|
||||
- [ ] Rollback procedure tested
|
||||
|
||||
---
|
||||
|
||||
## Getting Help
|
||||
|
||||
### Documentation References
|
||||
|
||||
- **Current State**: `docs/DOCKER_SETUP_ANALYSIS.md`
|
||||
- **Complete Guide**: `docs/HETZNER_PRODUCTION_GUIDE.md`
|
||||
- **Docker Usage**: `docs/DOCKER_GUIDE.md`
|
||||
- **Options Comparison**: `docs/DEPLOYMENT_HETZNER.md`
|
||||
|
||||
### External Resources
|
||||
|
||||
- [Hetzner Cloud Docs](https://docs.hetzner.com/cloud/)
|
||||
- [Docker Compose Reference](https://docs.docker.com/compose/)
|
||||
- [Traefik Documentation](https://doc.traefik.io/traefik/)
|
||||
- [Prometheus Documentation](https://prometheus.io/docs/)
|
||||
|
||||
### Support Channels
|
||||
|
||||
- Hetzner Support: https://console.hetzner.cloud/
|
||||
- Docker Community: https://forums.docker.com/
|
||||
- Your Team Documentation: `docs/` directory
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
You have:
|
||||
|
||||
- ✅ **Solid foundation** with multi-environment Docker setup
|
||||
- ✅ **4 containerized services** ready to deploy
|
||||
- ✅ **Complete documentation** for production deployment
|
||||
- ⚠️ **4 critical fixes** needed (2-4 hours of work)
|
||||
|
||||
After fixes:
|
||||
|
||||
- 🚀 **2-4 hours** to deploy to Hetzner
|
||||
- 💰 **€14-166/month** depending on scale (60-75% cheaper than AWS)
|
||||
- 📊 **Complete monitoring** with Prometheus + Grafana
|
||||
- 🔒 **Production-grade security** with firewalls and automated backups
|
||||
- 🔄 **Automated deployments** with GitHub Actions
|
||||
|
||||
**Total time to production**: ~10-15 hours from current state
|
||||
|
||||
---
|
||||
|
||||
**Document Version**: 1.0
|
||||
**Last Updated**: 2025-12-01
|
||||
**Next Review**: After first deployment
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,296 +0,0 @@
|
|||
# Production Launch Guide - mana.how
|
||||
|
||||
Diese Anleitung beschreibt alle Schritte um die Staging-Umgebung zur Production zu machen.
|
||||
|
||||
**Server:** 46.224.108.214 (Hetzner)
|
||||
**Domain:** mana.how
|
||||
|
||||
---
|
||||
|
||||
## Schritt 1: DNS-Einträge anlegen
|
||||
|
||||
Bei eurem DNS-Provider (wo `mana.how` registriert ist) folgende A-Records anlegen:
|
||||
|
||||
### Erforderliche DNS-Einträge
|
||||
|
||||
| Subdomain | Typ | Ziel | TTL |
|
||||
|-----------|-----|------|-----|
|
||||
| `@` (root) | A | 46.224.108.214 | 300 |
|
||||
| `www` | A | 46.224.108.214 | 300 |
|
||||
| `auth` | A | 46.224.108.214 | 300 |
|
||||
| `chat` | A | 46.224.108.214 | 300 |
|
||||
| `chat-api` | A | 46.224.108.214 | 300 |
|
||||
| `todo` | A | 46.224.108.214 | 300 |
|
||||
| `todo-api` | A | 46.224.108.214 | 300 |
|
||||
| `calendar` | A | 46.224.108.214 | 300 |
|
||||
| `calendar-api` | A | 46.224.108.214 | 300 |
|
||||
| `clock` | A | 46.224.108.214 | 300 |
|
||||
| `clock-api` | A | 46.224.108.214 | 300 |
|
||||
|
||||
**Alternative mit Wildcard:**
|
||||
| Subdomain | Typ | Ziel | TTL |
|
||||
|-----------|-----|------|-----|
|
||||
| `@` (root) | A | 46.224.108.214 | 300 |
|
||||
| `*` | A | 46.224.108.214 | 300 |
|
||||
|
||||
> **Hinweis:** Nach dem Anlegen kann es bis zu 24h dauern bis die DNS-Einträge weltweit propagiert sind. In der Praxis meist schneller.
|
||||
|
||||
### DNS prüfen
|
||||
|
||||
```bash
|
||||
# Prüfen ob DNS korrekt ist
|
||||
dig mana.how +short
|
||||
dig auth.mana.how +short
|
||||
dig chat.mana.how +short
|
||||
# Sollte jeweils 46.224.108.214 zurückgeben
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Schritt 2: Server vorbereiten
|
||||
|
||||
SSH auf den Server:
|
||||
|
||||
```bash
|
||||
ssh -i ~/.ssh/hetzner_deploy_key deploy@46.224.108.214
|
||||
```
|
||||
|
||||
### 2.1 Backup der aktuellen Staging-Daten (optional aber empfohlen)
|
||||
|
||||
```bash
|
||||
cd ~/manacore-staging
|
||||
|
||||
# Datenbank-Backup erstellen
|
||||
docker compose exec -T postgres pg_dumpall -U postgres > ~/backup_$(date +%Y%m%d_%H%M%S).sql
|
||||
|
||||
echo "Backup erstellt: ~/backup_*.sql"
|
||||
```
|
||||
|
||||
### 2.2 Staging Container stoppen
|
||||
|
||||
```bash
|
||||
cd ~/manacore-staging
|
||||
docker compose down
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Schritt 3: Production Konfiguration deployen
|
||||
|
||||
### 3.1 Verzeichnis umbenennen (optional)
|
||||
|
||||
```bash
|
||||
# Von staging zu production umbenennen
|
||||
mv ~/manacore-staging ~/manacore-production
|
||||
cd ~/manacore-production
|
||||
```
|
||||
|
||||
### 3.2 Production docker-compose kopieren
|
||||
|
||||
Vom lokalen Rechner:
|
||||
|
||||
```bash
|
||||
# Aus dem Repo-Root
|
||||
scp -i ~/.ssh/hetzner_deploy_key \
|
||||
docker-compose.production.yml \
|
||||
deploy@46.224.108.214:~/manacore-production/docker-compose.yml
|
||||
```
|
||||
|
||||
### 3.3 Production Caddyfile kopieren
|
||||
|
||||
```bash
|
||||
scp -i ~/.ssh/hetzner_deploy_key \
|
||||
docker/caddy/Caddyfile.production \
|
||||
deploy@46.224.108.214:~/Caddyfile
|
||||
```
|
||||
|
||||
### 3.4 Caddy neu laden
|
||||
|
||||
Auf dem Server:
|
||||
|
||||
```bash
|
||||
# Caddy Config neu laden
|
||||
docker exec caddy caddy reload --config /etc/caddy/Caddyfile
|
||||
|
||||
# Prüfen ob Caddy läuft
|
||||
docker logs caddy --tail 20
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Schritt 4: Environment Variables anpassen
|
||||
|
||||
Auf dem Server die `.env` Datei anpassen:
|
||||
|
||||
```bash
|
||||
cd ~/manacore-production
|
||||
nano .env
|
||||
```
|
||||
|
||||
Die bestehenden Staging-Werte können bleiben. Nur sicherstellen dass:
|
||||
|
||||
```env
|
||||
NODE_ENV=production
|
||||
|
||||
# Diese Werte bleiben gleich (Staging Secrets weiterverwenden):
|
||||
POSTGRES_PASSWORD=<behalten>
|
||||
REDIS_PASSWORD=<behalten>
|
||||
JWT_SECRET=<behalten>
|
||||
JWT_PUBLIC_KEY=<behalten>
|
||||
JWT_PRIVATE_KEY=<behalten>
|
||||
SUPABASE_URL=<behalten>
|
||||
SUPABASE_ANON_KEY=<behalten>
|
||||
SUPABASE_SERVICE_ROLE_KEY=<behalten>
|
||||
AZURE_OPENAI_ENDPOINT=<behalten>
|
||||
AZURE_OPENAI_API_KEY=<behalten>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Schritt 5: Container starten
|
||||
|
||||
```bash
|
||||
cd ~/manacore-production
|
||||
|
||||
# Images pullen
|
||||
docker compose pull
|
||||
|
||||
# Container starten
|
||||
docker compose up -d
|
||||
|
||||
# Status prüfen
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Schritt 6: Health Checks
|
||||
|
||||
```bash
|
||||
# Alle Services prüfen
|
||||
curl -s http://localhost:3001/api/v1/health # Auth
|
||||
curl -s http://localhost:5173/health # Dashboard
|
||||
curl -s http://localhost:3000/health # Chat Web
|
||||
curl -s http://localhost:3002/api/v1/health # Chat API
|
||||
curl -s http://localhost:5188/health # Todo Web
|
||||
curl -s http://localhost:3018/api/health # Todo API
|
||||
curl -s http://localhost:5186/health # Calendar Web
|
||||
curl -s http://localhost:3016/api/v1/health # Calendar API
|
||||
curl -s http://localhost:5187/health # Clock Web
|
||||
curl -s http://localhost:3017/api/v1/health # Clock API
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Schritt 7: SSL-Zertifikate (automatisch)
|
||||
|
||||
Caddy holt sich automatisch Let's Encrypt Zertifikate sobald die DNS-Einträge korrekt sind.
|
||||
|
||||
Prüfen:
|
||||
|
||||
```bash
|
||||
# Logs prüfen auf Certificate-Meldungen
|
||||
docker logs caddy 2>&1 | grep -i "certificate\|tls"
|
||||
|
||||
# Oder direkt testen
|
||||
curl -I https://mana.how
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Schritt 8: Finale Tests
|
||||
|
||||
Im Browser testen:
|
||||
|
||||
| URL | Erwartet |
|
||||
|-----|----------|
|
||||
| https://mana.how | Dashboard Login |
|
||||
| https://auth.mana.how/api/v1/health | `{"status":"ok"}` |
|
||||
| https://chat.mana.how | Chat App Login |
|
||||
| https://todo.mana.how | Todo App Login |
|
||||
| https://calendar.mana.how | Calendar App Login |
|
||||
| https://clock.mana.how | Clock App Login |
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Container startet nicht
|
||||
|
||||
```bash
|
||||
# Logs anschauen
|
||||
docker compose logs <service-name>
|
||||
|
||||
# Beispiel
|
||||
docker compose logs mana-core-auth
|
||||
docker compose logs chat-backend
|
||||
```
|
||||
|
||||
### DNS nicht propagiert
|
||||
|
||||
```bash
|
||||
# Verschiedene DNS-Server testen
|
||||
dig @8.8.8.8 mana.how +short # Google DNS
|
||||
dig @1.1.1.1 mana.how +short # Cloudflare DNS
|
||||
```
|
||||
|
||||
### SSL-Zertifikat Fehler
|
||||
|
||||
```bash
|
||||
# Caddy Logs prüfen
|
||||
docker logs caddy --tail 100
|
||||
|
||||
# Caddy neu starten
|
||||
docker restart caddy
|
||||
```
|
||||
|
||||
### Datenbank Verbindungsfehler
|
||||
|
||||
```bash
|
||||
# Postgres prüfen
|
||||
docker compose exec postgres psql -U postgres -l
|
||||
|
||||
# Datenbanken anzeigen
|
||||
docker compose exec postgres psql -U postgres -c "\l"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rollback zu Staging
|
||||
|
||||
Falls etwas schief geht:
|
||||
|
||||
```bash
|
||||
cd ~/manacore-production
|
||||
docker compose down
|
||||
|
||||
# Alte Staging docker-compose wiederherstellen
|
||||
# (müsste vorher gesichert werden)
|
||||
|
||||
# Caddyfile zurück auf staging
|
||||
scp -i ~/.ssh/hetzner_deploy_key \
|
||||
docker/caddy/Caddyfile.staging \
|
||||
deploy@46.224.108.214:~/Caddyfile
|
||||
|
||||
docker exec caddy caddy reload --config /etc/caddy/Caddyfile
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Zusammenfassung der URLs
|
||||
|
||||
Nach erfolgreichem Launch:
|
||||
|
||||
| App | URL |
|
||||
|-----|-----|
|
||||
| **Dashboard** | https://mana.how |
|
||||
| **Auth API** | https://auth.mana.how |
|
||||
| **Chat** | https://chat.mana.how |
|
||||
| **Chat API** | https://chat-api.mana.how |
|
||||
| **Todo** | https://todo.mana.how |
|
||||
| **Todo API** | https://todo-api.mana.how |
|
||||
| **Calendar** | https://calendar.mana.how |
|
||||
| **Calendar API** | https://calendar-api.mana.how |
|
||||
| **Clock** | https://clock.mana.how |
|
||||
| **Clock API** | https://clock-api.mana.how |
|
||||
|
|
@ -1,408 +0,0 @@
|
|||
# Staging Deployment Issues & Solutions
|
||||
|
||||
This document captures common issues encountered during staging deployments and their solutions. Reference this when debugging deployment problems.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Runtime Environment Variables (SvelteKit)](#1-runtime-environment-variables-sveltekit)
|
||||
2. [CORS Configuration](#2-cors-configuration)
|
||||
3. [CD Workflow Version Tags](#3-cd-workflow-version-tags)
|
||||
4. [Database Setup](#4-database-setup)
|
||||
5. [User ID Format (Better Auth)](#5-user-id-format-better-auth)
|
||||
6. [Debugging Checklist](#6-debugging-checklist)
|
||||
7. [Summary: Common Mistakes to Avoid](#summary-common-mistakes-to-avoid)
|
||||
|
||||
---
|
||||
|
||||
## 1. Runtime Environment Variables (SvelteKit)
|
||||
|
||||
### Problem
|
||||
|
||||
SvelteKit apps use `import.meta.env.PUBLIC_*` which gets **baked in at build time**. When running in Docker, the container uses whatever values were present during the GitHub Actions build, not the runtime environment variables.
|
||||
|
||||
**Symptoms:**
|
||||
- Web apps calling `localhost:3001` instead of staging server IP
|
||||
- API calls going to wrong URLs despite correct Docker env vars
|
||||
|
||||
### Solution
|
||||
|
||||
Use **runtime env injection** via `hooks.server.ts`:
|
||||
|
||||
```typescript
|
||||
// src/hooks.server.ts
|
||||
import type { Handle } from '@sveltejs/kit';
|
||||
|
||||
const PUBLIC_MANA_CORE_AUTH_URL_CLIENT =
|
||||
process.env.PUBLIC_MANA_CORE_AUTH_URL_CLIENT || '';
|
||||
const PUBLIC_BACKEND_URL_CLIENT =
|
||||
process.env.PUBLIC_BACKEND_URL_CLIENT || '';
|
||||
|
||||
export const handle: Handle = async ({ event, resolve }) => {
|
||||
return resolve(event, {
|
||||
transformPageChunk: ({ html }) => {
|
||||
const envScript = `<script>
|
||||
window.__PUBLIC_MANA_CORE_AUTH_URL__ = "${PUBLIC_MANA_CORE_AUTH_URL_CLIENT}";
|
||||
window.__PUBLIC_BACKEND_URL__ = "${PUBLIC_BACKEND_URL_CLIENT}";
|
||||
</script>`;
|
||||
return html.replace('<head>', `<head>${envScript}`);
|
||||
},
|
||||
});
|
||||
};
|
||||
```
|
||||
|
||||
Then in client code, read from `window` instead of `import.meta.env`:
|
||||
|
||||
```typescript
|
||||
import { browser } from '$app/environment';
|
||||
|
||||
function getApiUrl(): string {
|
||||
if (browser && typeof window !== 'undefined') {
|
||||
const injectedUrl = (window as any).__PUBLIC_BACKEND_URL__;
|
||||
if (injectedUrl) return injectedUrl;
|
||||
}
|
||||
return 'http://localhost:3000'; // fallback for local dev
|
||||
}
|
||||
```
|
||||
|
||||
### Lazy Client Initialization Pattern
|
||||
|
||||
**Important**: API clients must be lazily initialized to read the URL at request time, not at module load time:
|
||||
|
||||
```typescript
|
||||
// CORRECT - Lazy initialization
|
||||
let _client: ReturnType<typeof createApiClient> | null = null;
|
||||
|
||||
function getClient() {
|
||||
if (!_client) {
|
||||
_client = createApiClient(getApiUrl()); // URL evaluated when called
|
||||
}
|
||||
return _client;
|
||||
}
|
||||
|
||||
export async function getTasks() {
|
||||
return getClient().get('/tasks'); // Client created on first use
|
||||
}
|
||||
```
|
||||
|
||||
```typescript
|
||||
// WRONG - Module-level initialization
|
||||
const client = createApiClient(getApiUrl()); // URL evaluated at import time!
|
||||
|
||||
export async function getTasks() {
|
||||
return client.get('/tasks'); // Will use stale URL
|
||||
}
|
||||
```
|
||||
|
||||
**Why this matters**: When the module is imported, the `window` object may not have the injected environment variables yet. The lazy pattern ensures the URL is read only when the client is actually needed.
|
||||
|
||||
### Docker Compose Pattern
|
||||
|
||||
Use two environment variables:
|
||||
- `PUBLIC_*_URL` - Internal Docker network URL (container-to-container)
|
||||
- `PUBLIC_*_URL_CLIENT` - External URL for browser access
|
||||
|
||||
```yaml
|
||||
environment:
|
||||
PUBLIC_BACKEND_URL: http://backend-container:3000 # Server-side
|
||||
PUBLIC_BACKEND_URL_CLIENT: http://46.224.108.214:3000 # Browser-side
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. CORS Configuration
|
||||
|
||||
### Problem
|
||||
|
||||
Backends only allow CORS from their own web apps, blocking requests from other origins like manacore-web dashboard.
|
||||
|
||||
**Symptoms:**
|
||||
- `Access to fetch blocked by CORS policy`
|
||||
- `No 'Access-Control-Allow-Origin' header`
|
||||
|
||||
### Solution
|
||||
|
||||
Add all necessary origins to `CORS_ORIGINS` in docker-compose.staging.yml:
|
||||
|
||||
```yaml
|
||||
todo-backend:
|
||||
environment:
|
||||
# Include both the app's own web AND manacore-web dashboard
|
||||
CORS_ORIGINS: http://46.224.108.214:5188,http://46.224.108.214:5173,http://localhost:5188,http://localhost:5173
|
||||
```
|
||||
|
||||
### Checklist for New Backends
|
||||
|
||||
When deploying a new backend that will be called from manacore-web dashboard:
|
||||
1. Add `http://46.224.108.214:5173` to CORS_ORIGINS
|
||||
2. Add `http://localhost:5173` for local development
|
||||
3. Restart the container after config changes
|
||||
|
||||
### Testing CORS
|
||||
|
||||
```bash
|
||||
curl -I -X OPTIONS http://46.224.108.214:3018/api/v1/endpoint \
|
||||
-H "Origin: http://46.224.108.214:5173" \
|
||||
-H "Access-Control-Request-Method: GET"
|
||||
|
||||
# Should see:
|
||||
# Access-Control-Allow-Origin: http://46.224.108.214:5173
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. CD Workflow Version Tags
|
||||
|
||||
### Problem
|
||||
|
||||
docker-compose uses variables like `${TODO_WEB_VERSION:-latest}`, but the CD workflow wasn't updating the `.env` file on the staging server, causing containers to always use `latest` instead of the tagged version.
|
||||
|
||||
**Symptoms:**
|
||||
- Deployed new version but container still running old code
|
||||
- `docker ps` shows wrong image tag
|
||||
|
||||
### Solution
|
||||
|
||||
The CD workflow (`.github/workflows/cd-staging-tagged.yml`) now:
|
||||
1. Computes the version variable name (e.g., `TODO_WEB_VERSION`)
|
||||
2. Updates the `.env` file on staging server
|
||||
3. docker-compose reads from `.env`
|
||||
|
||||
### Tag Naming Convention
|
||||
|
||||
Tags must follow the exact project name as defined in the CD workflow:
|
||||
|
||||
| Project | Correct Tag Format | Wrong Format |
|
||||
|---------|-------------------|--------------|
|
||||
| mana-core-auth | `mana-core-auth-staging-v1.0.0` | `auth-staging-v1.0.0` |
|
||||
| chat | `chat-staging-v1.0.0` or `chat-all-staging-v1.0.0` | - |
|
||||
| todo | `todo-staging-v1.0.0` or `todo-all-staging-v1.0.0` | - |
|
||||
|
||||
**Note**: Using the wrong tag format (e.g., `auth-staging-*` instead of `mana-core-auth-staging-*`) will cause the workflow to fail because it won't find the correct Dockerfile path.
|
||||
|
||||
### Verifying Deployment
|
||||
|
||||
```bash
|
||||
# Check running container version
|
||||
docker ps --format '{{.Names}}: {{.Image}}' | grep todo
|
||||
|
||||
# Check .env file
|
||||
cat ~/manacore-staging/.env | grep VERSION
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Database Setup
|
||||
|
||||
### Problem
|
||||
|
||||
New backends fail with `database "X" does not exist` because the PostgreSQL databases weren't created.
|
||||
|
||||
**Symptoms:**
|
||||
- 500 Internal Server Error
|
||||
- Logs show: `PostgresError: database "todo" does not exist`
|
||||
|
||||
### Solution
|
||||
|
||||
Create databases manually on first deployment:
|
||||
|
||||
```bash
|
||||
# SSH to staging
|
||||
ssh deploy@46.224.108.214
|
||||
|
||||
# Create databases
|
||||
docker exec manacore-postgres-staging psql -U postgres -c 'CREATE DATABASE todo;'
|
||||
docker exec manacore-postgres-staging psql -U postgres -c 'CREATE DATABASE calendar;'
|
||||
docker exec manacore-postgres-staging psql -U postgres -c 'CREATE DATABASE clock;'
|
||||
|
||||
# Restart backends (they auto-migrate schemas on startup)
|
||||
cd ~/manacore-staging
|
||||
docker compose restart todo-backend calendar-backend clock-backend
|
||||
```
|
||||
|
||||
### Checklist for New Apps
|
||||
|
||||
When deploying a new app with a database:
|
||||
1. Create the database: `CREATE DATABASE appname;`
|
||||
2. The backend will auto-migrate the schema on startup
|
||||
3. Verify tables exist: `\dt` in psql
|
||||
|
||||
---
|
||||
|
||||
## 5. User ID Format (Better Auth)
|
||||
|
||||
### Problem
|
||||
|
||||
Backend database schemas use `uuid` type for `user_id`, but Better Auth generates non-UUID user IDs like `otUe1YrfENPdHnrF3g1vSBfpkQfambCZ`.
|
||||
|
||||
**Symptoms:**
|
||||
- 500 Internal Server Error on authenticated requests
|
||||
- Logs show: `invalid input syntax for type uuid: "otUe1YrfENPdHnrF3g1vSBfpkQfambCZ"`
|
||||
|
||||
### Solution
|
||||
|
||||
Change `user_id` columns from `uuid` to `text`:
|
||||
|
||||
```sql
|
||||
-- For each table with user_id (use USING clause for explicit conversion)
|
||||
ALTER TABLE tasks ALTER COLUMN user_id TYPE text USING user_id::text;
|
||||
ALTER TABLE projects ALTER COLUMN user_id TYPE text USING user_id::text;
|
||||
-- etc.
|
||||
```
|
||||
|
||||
**Important**: Always use the `USING` clause when converting column types. Without it, PostgreSQL may silently fail or produce unexpected results:
|
||||
|
||||
```sql
|
||||
-- CORRECT - Explicit conversion
|
||||
ALTER TABLE events ALTER COLUMN user_id TYPE text USING user_id::text;
|
||||
|
||||
-- RISKY - May fail silently on some data types
|
||||
ALTER TABLE events ALTER COLUMN user_id TYPE text;
|
||||
```
|
||||
|
||||
### Prevention
|
||||
|
||||
When creating new backend schemas, **always use `text` type for user_id**:
|
||||
|
||||
```typescript
|
||||
// Drizzle schema - CORRECT
|
||||
export const tasks = pgTable('tasks', {
|
||||
id: uuid('id').defaultRandom().primaryKey(),
|
||||
userId: text('user_id').notNull(), // Use text, not uuid
|
||||
// ...
|
||||
});
|
||||
|
||||
// WRONG - Don't do this
|
||||
export const tasks = pgTable('tasks', {
|
||||
userId: uuid('user_id').notNull(), // Will fail with Better Auth
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Debugging Commands
|
||||
|
||||
```bash
|
||||
# Check container logs
|
||||
docker logs <container-name> --tail 50
|
||||
|
||||
# Check container is running correct version
|
||||
docker ps --format '{{.Names}}: {{.Image}}'
|
||||
|
||||
# Test CORS
|
||||
curl -I -X OPTIONS <url> -H "Origin: <origin>"
|
||||
|
||||
# Check database exists
|
||||
docker exec manacore-postgres-staging psql -U postgres -c '\l'
|
||||
|
||||
# Check tables in database
|
||||
docker exec manacore-postgres-staging psql -U postgres -d <dbname> -c '\dt'
|
||||
|
||||
# Restart a service
|
||||
cd ~/manacore-staging && docker compose restart <service-name>
|
||||
|
||||
# Force recreate with new config
|
||||
cd ~/manacore-staging && docker compose up -d --no-deps --force-recreate <service-name>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Port Reference
|
||||
|
||||
| Service | Port |
|
||||
|---------|------|
|
||||
| mana-core-auth | 3001 |
|
||||
| chat-backend | 3002 |
|
||||
| calendar-backend | 3016 |
|
||||
| clock-backend | 3017 |
|
||||
| todo-backend | 3018 |
|
||||
| chat-web | 3000 |
|
||||
| manacore-web | 5173 |
|
||||
| calendar-web | 5186 |
|
||||
| clock-web | 5187 |
|
||||
| todo-web | 5188 |
|
||||
|
||||
---
|
||||
|
||||
## 6. Debugging Checklist
|
||||
|
||||
When something doesn't work on staging, follow this checklist:
|
||||
|
||||
### API Returns Wrong Data or Fails
|
||||
|
||||
1. **Check if calling correct URL**
|
||||
```bash
|
||||
# In browser console
|
||||
console.log(window.__PUBLIC_BACKEND_URL__)
|
||||
```
|
||||
If undefined or localhost, the runtime env injection isn't working.
|
||||
|
||||
2. **Check CORS**
|
||||
```bash
|
||||
curl -I -X OPTIONS http://46.224.108.214:<port>/api/v1/endpoint \
|
||||
-H "Origin: http://46.224.108.214:5173"
|
||||
```
|
||||
Should return `Access-Control-Allow-Origin` header.
|
||||
|
||||
3. **Check container logs**
|
||||
```bash
|
||||
ssh deploy@46.224.108.214 "docker logs <container-name> --tail 100"
|
||||
```
|
||||
|
||||
### 500 Internal Server Error
|
||||
|
||||
1. **Check database exists**
|
||||
```bash
|
||||
docker exec manacore-postgres-staging psql -U postgres -c '\l'
|
||||
```
|
||||
|
||||
2. **Check tables exist**
|
||||
```bash
|
||||
docker exec manacore-postgres-staging psql -U postgres -d <dbname> -c '\dt'
|
||||
```
|
||||
|
||||
3. **Check for type mismatches** (especially user_id uuid vs text)
|
||||
|
||||
### 401 Unauthorized
|
||||
|
||||
1. **Check token is being sent**
|
||||
```bash
|
||||
# In browser Network tab, check Authorization header
|
||||
```
|
||||
|
||||
2. **Check JWKS endpoint**
|
||||
```bash
|
||||
curl http://46.224.108.214:3001/api/v1/auth/jwks
|
||||
```
|
||||
|
||||
3. **Check issuer/audience match** - Token must have `iss: manacore` and `aud: manacore`
|
||||
|
||||
### Container Not Updated
|
||||
|
||||
1. **Check image version**
|
||||
```bash
|
||||
docker ps --format '{{.Names}}: {{.Image}}'
|
||||
```
|
||||
|
||||
2. **Check .env file**
|
||||
```bash
|
||||
cat ~/manacore-staging/.env | grep VERSION
|
||||
```
|
||||
|
||||
3. **Force recreate**
|
||||
```bash
|
||||
docker compose up -d --no-deps --force-recreate <service-name>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Summary: Common Mistakes to Avoid
|
||||
|
||||
| Mistake | Consequence | Prevention |
|
||||
|---------|-------------|------------|
|
||||
| Using `import.meta.env` for Docker runtime | URLs baked at build time | Use `window.__PUBLIC_*__` with runtime injection |
|
||||
| Initializing API clients at module level | Client uses stale URLs | Use lazy initialization pattern |
|
||||
| Using `uuid` type for user_id | Better Auth IDs fail validation | Always use `text` type for user_id |
|
||||
| Missing CORS origin for manacore-web | Dashboard can't call backends | Add port 5173 to all backend CORS configs |
|
||||
| Wrong tag format for mana-core-auth | Deployment fails, can't find Dockerfile | Use `mana-core-auth-staging-v*` not `auth-staging-v*` |
|
||||
| Forgetting to create database | Backend crashes on startup | Create database before first deployment |
|
||||
| ALTER TABLE without USING clause | Silent failures on type conversion | Always use `USING column::new_type` |
|
||||
|
|
@ -1,441 +0,0 @@
|
|||
# Staging Environment Setup Guide
|
||||
|
||||
This document describes the complete staging environment setup for ManaCore apps on Hetzner VPS with HTTPS via Caddy reverse proxy.
|
||||
|
||||
## Overview
|
||||
|
||||
| Component | Details |
|
||||
|-----------|---------|
|
||||
| **Server** | Hetzner VPS (46.224.108.214) |
|
||||
| **Domain** | manacore.ai (Namecheap) |
|
||||
| **Reverse Proxy** | Caddy (auto-SSL via Let's Encrypt) |
|
||||
| **Container Runtime** | Docker Compose |
|
||||
| **SSH Access** | `ssh -i ~/.ssh/hetzner_deploy_key deploy@46.224.108.214` |
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ Hetzner VPS (46.224.108.214) │
|
||||
│ │
|
||||
Internet │ ┌─────────────────────────────────────┐ │
|
||||
│ │ │ Caddy (ports 80/443) │ │
|
||||
│ │ │ Auto-SSL via Let's Encrypt │ │
|
||||
▼ │ └──────────────┬──────────────────────┘ │
|
||||
┌──────────────┐ │ │ │
|
||||
│ Namecheap │ │ ▼ │
|
||||
│ DNS Records │────────────────────│ ┌─────────────────────────────────────┐ │
|
||||
│ │ │ │ Docker Compose Services │ │
|
||||
│ *.staging │ │ │ │ │
|
||||
│ A → IP │ │ │ mana-core-auth:3001 │ │
|
||||
└──────────────┘ │ │ chat-web:3000 / chat-backend:3002 │ │
|
||||
│ │ clock-web:5187 / clock-backend:3017│ │
|
||||
│ │ calendar-web:5186 / calendar-api:3016│ │
|
||||
│ │ todo-web:5188 / todo-backend:3018 │ │
|
||||
│ │ manacore-web:5173 │ │
|
||||
│ │ postgres:5432 / redis:6379 │ │
|
||||
│ └─────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Domain Mapping
|
||||
|
||||
### DNS Configuration (Namecheap)
|
||||
|
||||
| Type | Host | Value | TTL |
|
||||
|------|------|-------|-----|
|
||||
| A | `staging` | 46.224.108.214 | Automatic |
|
||||
| A | `*.staging` | 46.224.108.214 | Automatic |
|
||||
|
||||
The wildcard record `*.staging` enables all subdomains like `auth.staging.manacore.ai`, `clock.staging.manacore.ai`, etc.
|
||||
|
||||
### Staging URLs
|
||||
|
||||
| Service | URL | Internal Port |
|
||||
|---------|-----|---------------|
|
||||
| **Auth** | https://auth.staging.manacore.ai | 3001 |
|
||||
| **ManaCore Web** | https://staging.manacore.ai | 5173 |
|
||||
| **Chat Web** | https://chat.staging.manacore.ai | 3000 |
|
||||
| **Chat API** | https://chat-api.staging.manacore.ai | 3002 |
|
||||
| **Clock Web** | https://clock.staging.manacore.ai | 5187 |
|
||||
| **Clock API** | https://clock-api.staging.manacore.ai | 3017 |
|
||||
| **Calendar Web** | https://calendar.staging.manacore.ai | 5186 |
|
||||
| **Calendar API** | https://calendar-api.staging.manacore.ai | 3016 |
|
||||
| **Todo Web** | https://todo.staging.manacore.ai | 5188 |
|
||||
| **Todo API** | https://todo-api.staging.manacore.ai | 3018 |
|
||||
|
||||
## Caddy Reverse Proxy
|
||||
|
||||
### Installation (One-time setup)
|
||||
|
||||
```bash
|
||||
# SSH into server
|
||||
ssh -i ~/.ssh/hetzner_deploy_key deploy@46.224.108.214
|
||||
|
||||
# Create Caddy data directory
|
||||
mkdir -p ~/caddy_data ~/caddy_config
|
||||
|
||||
# Run Caddy container
|
||||
docker run -d \
|
||||
--name caddy \
|
||||
--network host \
|
||||
--restart unless-stopped \
|
||||
-v ~/Caddyfile:/etc/caddy/Caddyfile \
|
||||
-v ~/caddy_data:/data \
|
||||
-v ~/caddy_config:/config \
|
||||
caddy:2-alpine
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
The Caddyfile is stored at:
|
||||
- **Server**: `~/Caddyfile`
|
||||
- **Repo**: `docker/caddy/Caddyfile.staging`
|
||||
|
||||
```caddyfile
|
||||
# ManaCore Staging Reverse Proxy
|
||||
|
||||
auth.staging.manacore.ai {
|
||||
reverse_proxy localhost:3001
|
||||
}
|
||||
|
||||
chat.staging.manacore.ai {
|
||||
reverse_proxy localhost:3000
|
||||
}
|
||||
|
||||
chat-api.staging.manacore.ai {
|
||||
reverse_proxy localhost:3002
|
||||
}
|
||||
|
||||
staging.manacore.ai {
|
||||
reverse_proxy localhost:5173
|
||||
}
|
||||
|
||||
calendar.staging.manacore.ai {
|
||||
reverse_proxy localhost:5186
|
||||
}
|
||||
|
||||
calendar-api.staging.manacore.ai {
|
||||
reverse_proxy localhost:3016
|
||||
}
|
||||
|
||||
clock.staging.manacore.ai {
|
||||
reverse_proxy localhost:5187
|
||||
}
|
||||
|
||||
clock-api.staging.manacore.ai {
|
||||
reverse_proxy localhost:3017
|
||||
}
|
||||
|
||||
todo.staging.manacore.ai {
|
||||
reverse_proxy localhost:5188
|
||||
}
|
||||
|
||||
todo-api.staging.manacore.ai {
|
||||
reverse_proxy localhost:3018
|
||||
}
|
||||
```
|
||||
|
||||
### Updating Caddy Configuration
|
||||
|
||||
```bash
|
||||
# Copy updated config to server
|
||||
scp -i ~/.ssh/hetzner_deploy_key docker/caddy/Caddyfile.staging deploy@46.224.108.214:~/Caddyfile
|
||||
|
||||
# Reload Caddy (no downtime)
|
||||
ssh -i ~/.ssh/hetzner_deploy_key deploy@46.224.108.214 "docker exec caddy caddy reload --config /etc/caddy/Caddyfile"
|
||||
```
|
||||
|
||||
### Caddy Management Commands
|
||||
|
||||
```bash
|
||||
# View logs
|
||||
docker logs caddy -f
|
||||
|
||||
# Restart Caddy
|
||||
docker restart caddy
|
||||
|
||||
# Check Caddy status
|
||||
docker exec caddy caddy validate --config /etc/caddy/Caddyfile
|
||||
```
|
||||
|
||||
## SvelteKit Runtime Environment Variables
|
||||
|
||||
### The Problem
|
||||
|
||||
SvelteKit's `$env/static/public` variables are replaced at **build time**. When Docker images are built in CI, the environment variables are baked into the JavaScript bundles. This means containers cannot use different URLs for different environments.
|
||||
|
||||
### The Solution
|
||||
|
||||
Use `$env/dynamic/private` in `hooks.server.ts` to read environment variables at **runtime**, then inject them into the HTML for client-side access.
|
||||
|
||||
### Implementation
|
||||
|
||||
Each SvelteKit web app has a `hooks.server.ts` that:
|
||||
1. Reads `_CLIENT` environment variables at runtime
|
||||
2. Injects them into the HTML via `<script>` tag
|
||||
3. Makes them available on `window.__PUBLIC_*__`
|
||||
|
||||
**Example: `apps/clock/apps/web/src/hooks.server.ts`**
|
||||
|
||||
```typescript
|
||||
import type { Handle } from '@sveltejs/kit';
|
||||
import { env } from '$env/dynamic/private';
|
||||
|
||||
export const handle: Handle = async ({ event, resolve }) => {
|
||||
// Read env vars at RUNTIME (not build time)
|
||||
const authUrlClient = env.PUBLIC_MANA_CORE_AUTH_URL_CLIENT || env.PUBLIC_MANA_CORE_AUTH_URL || '';
|
||||
const backendUrlClient = env.PUBLIC_BACKEND_URL_CLIENT || env.PUBLIC_BACKEND_URL || '';
|
||||
|
||||
return resolve(event, {
|
||||
transformPageChunk: ({ html }) => {
|
||||
// Inject into HTML for client-side access
|
||||
const envScript = `<script>
|
||||
window.__PUBLIC_MANA_CORE_AUTH_URL__ = "${authUrlClient}";
|
||||
window.__PUBLIC_BACKEND_URL__ = "${backendUrlClient}";
|
||||
</script>`;
|
||||
return html.replace('<head>', `<head>${envScript}`);
|
||||
},
|
||||
});
|
||||
};
|
||||
```
|
||||
|
||||
### Environment Variable Pattern
|
||||
|
||||
Each web app container receives two sets of URLs:
|
||||
|
||||
| Variable | Purpose | Example |
|
||||
|----------|---------|---------|
|
||||
| `PUBLIC_BACKEND_URL` | Server-side (Docker network) | `http://clock-backend:3017` |
|
||||
| `PUBLIC_BACKEND_URL_CLIENT` | Client-side (browser) | `https://clock-api.staging.manacore.ai` |
|
||||
| `PUBLIC_MANA_CORE_AUTH_URL` | Server-side auth | `http://mana-core-auth:3001` |
|
||||
| `PUBLIC_MANA_CORE_AUTH_URL_CLIENT` | Client-side auth | `https://auth.staging.manacore.ai` |
|
||||
|
||||
## Docker Compose Configuration
|
||||
|
||||
### File Locations
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `docker-compose.staging.yml` | Staging configuration (repo) |
|
||||
| `~/manacore-staging/docker-compose.yml` | Server deployment |
|
||||
|
||||
### Key Configuration Sections
|
||||
|
||||
**Web App Environment Variables:**
|
||||
```yaml
|
||||
clock-web:
|
||||
environment:
|
||||
NODE_ENV: staging
|
||||
PORT: 5187
|
||||
# Server-side URLs (Docker internal network)
|
||||
PUBLIC_BACKEND_URL: http://clock-backend:3017
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
# Client-side URLs (browser access via HTTPS)
|
||||
PUBLIC_BACKEND_URL_CLIENT: https://clock-api.staging.manacore.ai
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: https://auth.staging.manacore.ai
|
||||
```
|
||||
|
||||
**Backend CORS Configuration:**
|
||||
```yaml
|
||||
clock-backend:
|
||||
environment:
|
||||
CORS_ORIGINS: https://clock.staging.manacore.ai,https://staging.manacore.ai,http://localhost:5187
|
||||
```
|
||||
|
||||
**Auth Service CORS:**
|
||||
```yaml
|
||||
mana-core-auth:
|
||||
environment:
|
||||
CORS_ORIGINS: https://chat.staging.manacore.ai,https://staging.manacore.ai,https://calendar.staging.manacore.ai,https://clock.staging.manacore.ai,https://todo.staging.manacore.ai,http://localhost:3000,http://localhost:5173
|
||||
```
|
||||
|
||||
### Syncing Configuration to Server
|
||||
|
||||
```bash
|
||||
# Copy docker-compose to server
|
||||
scp -i ~/.ssh/hetzner_deploy_key docker-compose.staging.yml deploy@46.224.108.214:~/manacore-staging/docker-compose.yml
|
||||
|
||||
# Recreate containers with new config
|
||||
ssh -i ~/.ssh/hetzner_deploy_key deploy@46.224.108.214 "cd ~/manacore-staging && docker compose up -d --force-recreate"
|
||||
```
|
||||
|
||||
## Deployment Workflow
|
||||
|
||||
### CI/CD Pipeline
|
||||
|
||||
The GitHub Actions workflow (`.github/workflows/cd-staging.yml`):
|
||||
1. Builds Docker images on push to `dev` branch
|
||||
2. Pushes images to GitHub Container Registry (ghcr.io)
|
||||
3. SSHs into staging server
|
||||
4. Pulls latest images
|
||||
5. Restarts containers
|
||||
|
||||
### Manual Deployment
|
||||
|
||||
```bash
|
||||
# 1. Build and push images (from local)
|
||||
docker build -t ghcr.io/memo-2023/clock-web:latest -f apps/clock/apps/web/Dockerfile .
|
||||
docker push ghcr.io/memo-2023/clock-web:latest
|
||||
|
||||
# 2. SSH into server
|
||||
ssh -i ~/.ssh/hetzner_deploy_key deploy@46.224.108.214
|
||||
|
||||
# 3. Pull and restart
|
||||
cd ~/manacore-staging
|
||||
docker compose pull
|
||||
docker compose up -d --force-recreate
|
||||
```
|
||||
|
||||
### Updating Environment Variables
|
||||
|
||||
1. Edit `docker-compose.staging.yml` locally
|
||||
2. Copy to server: `scp -i ~/.ssh/hetzner_deploy_key docker-compose.staging.yml deploy@46.224.108.214:~/manacore-staging/docker-compose.yml`
|
||||
3. Recreate affected containers: `docker compose up -d --force-recreate <service-name>`
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Mixed Content Errors
|
||||
|
||||
**Symptom:** Browser console shows "Mixed Content: The page was loaded over HTTPS, but requested an insecure resource"
|
||||
|
||||
**Cause:** Client-side JavaScript is calling HTTP URLs instead of HTTPS
|
||||
|
||||
**Solution:**
|
||||
1. Check `_CLIENT` environment variables in docker-compose.yml
|
||||
2. Ensure they use `https://` staging domains
|
||||
3. Recreate web containers: `docker compose up -d --force-recreate <web-service>`
|
||||
|
||||
### CORS Errors
|
||||
|
||||
**Symptom:** Browser console shows "Access-Control-Allow-Origin" errors
|
||||
|
||||
**Cause:** Backend CORS_ORIGINS doesn't include the HTTPS staging domain
|
||||
|
||||
**Solution:**
|
||||
1. Add the HTTPS domain to `CORS_ORIGINS` in docker-compose.yml
|
||||
2. Recreate backend containers
|
||||
|
||||
### Caddy SSL Certificate Issues
|
||||
|
||||
**Symptom:** Browser shows SSL certificate warning
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Check Caddy logs
|
||||
docker logs caddy
|
||||
|
||||
# Force certificate renewal
|
||||
docker exec caddy caddy reload --config /etc/caddy/Caddyfile
|
||||
```
|
||||
|
||||
### Container Health Check Failures
|
||||
|
||||
**Symptom:** Container shows "unhealthy" status
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Check container logs
|
||||
docker logs <container-name>
|
||||
|
||||
# Check health status
|
||||
docker inspect <container-name> | grep -A 20 Health
|
||||
```
|
||||
|
||||
## Adding a New App to Staging
|
||||
|
||||
### 1. Update DNS (if needed)
|
||||
|
||||
If using a new subdomain pattern, update Namecheap DNS. The `*.staging` wildcard should cover most cases.
|
||||
|
||||
### 2. Update Caddyfile
|
||||
|
||||
Add entries for web and API:
|
||||
```caddyfile
|
||||
newapp.staging.manacore.ai {
|
||||
reverse_proxy localhost:<WEB_PORT>
|
||||
}
|
||||
|
||||
newapp-api.staging.manacore.ai {
|
||||
reverse_proxy localhost:<API_PORT>
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Update docker-compose.staging.yml
|
||||
|
||||
Add the new services with proper environment variables:
|
||||
```yaml
|
||||
newapp-web:
|
||||
image: ghcr.io/memo-2023/newapp-web:latest
|
||||
environment:
|
||||
PUBLIC_BACKEND_URL: http://newapp-backend:<API_PORT>
|
||||
PUBLIC_MANA_CORE_AUTH_URL: http://mana-core-auth:3001
|
||||
PUBLIC_BACKEND_URL_CLIENT: https://newapp-api.staging.manacore.ai
|
||||
PUBLIC_MANA_CORE_AUTH_URL_CLIENT: https://auth.staging.manacore.ai
|
||||
ports:
|
||||
- "<WEB_PORT>:<WEB_PORT>"
|
||||
```
|
||||
|
||||
### 4. Implement hooks.server.ts
|
||||
|
||||
Copy the runtime env var pattern from an existing app:
|
||||
```typescript
|
||||
import type { Handle } from '@sveltejs/kit';
|
||||
import { env } from '$env/dynamic/private';
|
||||
|
||||
export const handle: Handle = async ({ event, resolve }) => {
|
||||
const authUrlClient = env.PUBLIC_MANA_CORE_AUTH_URL_CLIENT || '';
|
||||
const backendUrlClient = env.PUBLIC_BACKEND_URL_CLIENT || '';
|
||||
|
||||
return resolve(event, {
|
||||
transformPageChunk: ({ html }) => {
|
||||
const envScript = `<script>
|
||||
window.__PUBLIC_MANA_CORE_AUTH_URL__ = "${authUrlClient}";
|
||||
window.__PUBLIC_BACKEND_URL__ = "${backendUrlClient}";
|
||||
</script>`;
|
||||
return html.replace('<head>', `<head>${envScript}`);
|
||||
},
|
||||
});
|
||||
};
|
||||
```
|
||||
|
||||
### 5. Deploy
|
||||
|
||||
1. Sync Caddyfile: `scp ... Caddyfile.staging deploy@server:~/Caddyfile`
|
||||
2. Reload Caddy: `docker exec caddy caddy reload --config /etc/caddy/Caddyfile`
|
||||
3. Sync docker-compose: `scp ... docker-compose.staging.yml deploy@server:~/manacore-staging/docker-compose.yml`
|
||||
4. Deploy containers: `docker compose up -d`
|
||||
|
||||
## Quick Reference Commands
|
||||
|
||||
```bash
|
||||
# SSH into server
|
||||
ssh -i ~/.ssh/hetzner_deploy_key deploy@46.224.108.214
|
||||
|
||||
# View all containers
|
||||
docker ps
|
||||
|
||||
# View container logs
|
||||
docker logs -f <container-name>
|
||||
|
||||
# Restart a container
|
||||
docker restart <container-name>
|
||||
|
||||
# Recreate containers with new config
|
||||
cd ~/manacore-staging && docker compose up -d --force-recreate
|
||||
|
||||
# Check Caddy SSL certificates
|
||||
docker exec caddy caddy validate --config /etc/caddy/Caddyfile
|
||||
|
||||
# Test HTTPS endpoint
|
||||
curl -s https://auth.staging.manacore.ai/api/v1/health
|
||||
|
||||
# Check container env vars
|
||||
docker exec <container-name> printenv | grep -E 'CLIENT|CORS'
|
||||
```
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [Local Development Guide](./LOCAL_DEVELOPMENT.md)
|
||||
- [CI/CD Deployment Guide](./DEPLOYMENT.md)
|
||||
- [Environment Variables](./ENVIRONMENT_VARIABLES.md)
|
||||
Loading…
Add table
Add a link
Reference in a new issue