From 2fe7f842c6bb7f173dacb51a117a25eb1ccac497 Mon Sep 17 00:00:00 2001 From: Till-JS <101404291+Till-JS@users.noreply.github.com> Date: Thu, 12 Feb 2026 12:51:49 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20fix(mac-mini):=20add=20container?= =?UTF-8?q?=20recovery=20and=20update=20health=20check=20ports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add ensure-containers-running.sh to detect and auto-start stuck containers - Add LaunchD plist for automatic container health checks every 5 minutes - Update health-check.sh with correct ports (3031/5011 for todo, etc.) - Update deploy.sh health checks to match docker-compose.macmini.yml - Fix container name references (mana-infra-postgres instead of manacore-postgres) This prevents 502 errors when containers get stuck in "Created" status. Co-Authored-By: Claude Opus 4.5 --- scripts/mac-mini/deploy.sh | 22 +-- scripts/mac-mini/ensure-containers-running.sh | 156 ++++++++++++++++++ scripts/mac-mini/health-check.sh | 84 +++++++--- .../com.manacore.ensure-containers.plist | 32 ++++ 4 files changed, 261 insertions(+), 33 deletions(-) create mode 100755 scripts/mac-mini/ensure-containers-running.sh create mode 100644 scripts/mac-mini/launchd/com.manacore.ensure-containers.plist diff --git a/scripts/mac-mini/deploy.sh b/scripts/mac-mini/deploy.sh index d899b5746..c5ef9e386 100755 --- a/scripts/mac-mini/deploy.sh +++ b/scripts/mac-mini/deploy.sh @@ -92,16 +92,18 @@ check_health() { fi } -check_health "Auth API" "http://localhost:3001/api/v1/health" -check_health "ManaCore Web" "http://localhost:5173/health" -check_health "Chat Backend" "http://localhost:3002/api/v1/health" -check_health "Chat Web" "http://localhost:3000/health" -check_health "Todo Backend" "http://localhost:3018/api/health" -check_health "Todo Web" "http://localhost:5188/health" -check_health "Calendar Backend" "http://localhost:3016/api/v1/health" -check_health "Calendar Web" "http://localhost:5186/health" -check_health "Clock Backend" "http://localhost:3017/api/v1/health" -check_health "Clock Web" "http://localhost:5187/health" +check_health "Auth API" "http://localhost:3001/health" +check_health "ManaCore Web" "http://localhost:5000/health" +check_health "Chat Backend" "http://localhost:3030/health" +check_health "Chat Web" "http://localhost:5010/health" +check_health "Todo Backend" "http://localhost:3031/health" +check_health "Todo Web" "http://localhost:5011/health" +check_health "Calendar Backend" "http://localhost:3032/health" +check_health "Calendar Web" "http://localhost:5012/health" +check_health "Clock Backend" "http://localhost:3033/health" +check_health "Clock Web" "http://localhost:5013/health" +check_health "Contacts Backend" "http://localhost:3034/health" +check_health "Contacts Web" "http://localhost:5014/health" echo "" echo "=== Deployment Complete ===" diff --git a/scripts/mac-mini/ensure-containers-running.sh b/scripts/mac-mini/ensure-containers-running.sh new file mode 100755 index 000000000..aba49db94 --- /dev/null +++ b/scripts/mac-mini/ensure-containers-running.sh @@ -0,0 +1,156 @@ +#!/bin/bash +# ManaCore Container Health Enforcer +# Ensures all containers are actually running, not just created +# +# This script detects containers that are stuck in "Created" or "Exited" +# status and automatically starts them. +# +# Run via LaunchD every 5 minutes or after system startup. + +set -e + +# Ensure PATH includes docker +export PATH="/usr/local/bin:/opt/homebrew/bin:$PATH" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +COMPOSE_FILE="$PROJECT_ROOT/docker-compose.macmini.yml" +ENV_FILE="$PROJECT_ROOT/.env.macmini" +LOG_FILE="/tmp/manacore-container-health.log" + +# Load notification config if exists +if [ -f "$PROJECT_ROOT/.env.notifications" ]; then + source "$PROJECT_ROOT/.env.notifications" +fi + +log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" +} + +send_notification() { + local message="$1" + + # Telegram + if [ -n "$TELEGRAM_BOT_TOKEN" ] && [ -n "$TELEGRAM_CHAT_ID" ]; then + curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \ + -d "chat_id=${TELEGRAM_CHAT_ID}" \ + -d "text=${message}" \ + -d "parse_mode=HTML" \ + >/dev/null 2>&1 || true + fi + + # ntfy + if [ -n "$NTFY_TOPIC" ]; then + curl -s -d "$message" \ + -H "Title: ManaCore Container Fixed" \ + -H "Priority: default" \ + -H "Tags: white_check_mark" \ + "https://ntfy.sh/$NTFY_TOPIC" >/dev/null 2>&1 || true + fi +} + +# Check if docker is running +if ! docker info >/dev/null 2>&1; then + log "ERROR: Docker is not running" + exit 1 +fi + +# Get containers that are NOT running (Created, Exited, etc.) +# Filter only mana-* containers from our compose file +STUCK_CONTAINERS=$(docker ps -a --filter "status=created" --filter "status=exited" --format "{{.Names}}" | grep "^mana-" || true) + +if [ -z "$STUCK_CONTAINERS" ]; then + log "OK: All containers are running" + exit 0 +fi + +log "WARNING: Found containers not running:" +echo "$STUCK_CONTAINERS" | while read container; do + STATUS=$(docker inspect "$container" --format '{{.State.Status}}' 2>/dev/null || echo "unknown") + log " - $container (status: $STATUS)" +done + +# Start the stuck containers using docker compose +log "Starting stuck containers via docker compose..." + +cd "$PROJECT_ROOT" + +# Use docker compose up for the specific services +# This ensures dependencies are respected +for container in $STUCK_CONTAINERS; do + # Extract service name from container name (remove mana-app- or mana-* prefix) + # Container naming: mana-{category}-{service} or mana-app-{service}-{type} + SERVICE_NAME="" + + case "$container" in + mana-app-todo-web) SERVICE_NAME="todo-web" ;; + mana-app-todo-backend) SERVICE_NAME="todo-backend" ;; + mana-app-chat-web) SERVICE_NAME="chat-web" ;; + mana-app-chat-backend) SERVICE_NAME="chat-backend" ;; + mana-app-calendar-web) SERVICE_NAME="calendar-web" ;; + mana-app-calendar-backend) SERVICE_NAME="calendar-backend" ;; + mana-app-clock-web) SERVICE_NAME="clock-web" ;; + mana-app-clock-backend) SERVICE_NAME="clock-backend" ;; + mana-app-contacts-web) SERVICE_NAME="contacts-web" ;; + mana-app-contacts-backend) SERVICE_NAME="contacts-backend" ;; + mana-app-storage-web) SERVICE_NAME="storage-web" ;; + mana-app-storage-backend) SERVICE_NAME="storage-backend" ;; + mana-app-presi-web) SERVICE_NAME="presi-web" ;; + mana-app-presi-backend) SERVICE_NAME="presi-backend" ;; + mana-app-nutriphi-web) SERVICE_NAME="nutriphi-web" ;; + mana-app-nutriphi-backend) SERVICE_NAME="nutriphi-backend" ;; + mana-app-skilltree-web) SERVICE_NAME="skilltree-web" ;; + mana-app-skilltree-backend) SERVICE_NAME="skilltree-backend" ;; + mana-app-photos-web) SERVICE_NAME="photos-web" ;; + mana-app-photos-backend) SERVICE_NAME="photos-backend" ;; + mana-app-web) SERVICE_NAME="mana-web" ;; + mana-core-auth) SERVICE_NAME="mana-auth" ;; + mana-core-gateway) SERVICE_NAME="api-gateway" ;; + mana-core-search) SERVICE_NAME="mana-search" ;; + mana-core-searxng) SERVICE_NAME="searxng" ;; + mana-core-media) SERVICE_NAME="mana-media" ;; + mana-infra-postgres) SERVICE_NAME="postgres" ;; + mana-infra-redis) SERVICE_NAME="redis" ;; + mana-infra-minio) SERVICE_NAME="minio" ;; + mana-matrix-synapse) SERVICE_NAME="synapse" ;; + mana-matrix-element) SERVICE_NAME="element-web" ;; + mana-matrix-web) SERVICE_NAME="matrix-web" ;; + mana-matrix-bot-*) SERVICE_NAME="${container#mana-matrix-bot-}"; SERVICE_NAME="matrix-${SERVICE_NAME}-bot" ;; + mana-mon-*) SERVICE_NAME="${container#mana-mon-}" ;; + mana-auto-*) SERVICE_NAME="${container#mana-auto-}" ;; + mana-service-*) SERVICE_NAME="${container#mana-service-}" ;; + mana-app-llm-playground) SERVICE_NAME="llm-playground" ;; + *) + log " Unknown container pattern: $container, trying direct start" + docker start "$container" 2>&1 || true + continue + ;; + esac + + if [ -n "$SERVICE_NAME" ]; then + log " Starting service: $SERVICE_NAME" + docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d "$SERVICE_NAME" 2>&1 || { + log " WARNING: Failed to start $SERVICE_NAME via compose, trying direct start" + docker start "$container" 2>&1 || true + } + fi +done + +# Wait for containers to start +sleep 10 + +# Verify containers are now running +STILL_STUCK=$(docker ps -a --filter "status=created" --filter "status=exited" --format "{{.Names}}" | grep "^mana-" || true) + +if [ -z "$STILL_STUCK" ]; then + FIXED_MSG="Auto-fixed stuck containers: $(echo $STUCK_CONTAINERS | tr '\n' ', ')" + log "SUCCESS: $FIXED_MSG" + send_notification "🔧 ManaCore Auto-Recovery\n\n$FIXED_MSG" +else + log "ERROR: Some containers still not running:" + echo "$STILL_STUCK" | while read container; do + log " - $container" + done + send_notification "⚠️ ManaCore Container Issue\n\nContainers still stuck: $(echo $STILL_STUCK | tr '\n' ', ')" + exit 1 +fi diff --git a/scripts/mac-mini/health-check.sh b/scripts/mac-mini/health-check.sh index c8f6566ab..d3e71478c 100755 --- a/scripts/mac-mini/health-check.sh +++ b/scripts/mac-mini/health-check.sh @@ -174,7 +174,7 @@ echo "" echo "Infrastructure:" # Check postgres via docker -if docker exec manacore-postgres pg_isready -U postgres >/dev/null 2>&1; then +if docker exec mana-infra-postgres pg_isready -U postgres >/dev/null 2>&1; then echo -e " ${GREEN}[OK]${NC} PostgreSQL" else echo -e " ${RED}[FAIL]${NC} PostgreSQL" @@ -182,60 +182,98 @@ else fi # Check redis via docker -if docker exec manacore-redis redis-cli ping >/dev/null 2>&1; then +if docker exec mana-infra-redis redis-cli ping >/dev/null 2>&1; then echo -e " ${GREEN}[OK]${NC} Redis" else echo -e " ${RED}[FAIL]${NC} Redis" FAILURES+=("Redis") fi +# Check for stuck containers (Created/Exited status) +STUCK_CONTAINERS=$(docker ps -a --filter "status=created" --filter "status=exited" --format "{{.Names}}" | grep "^mana-" || true) +if [ -n "$STUCK_CONTAINERS" ]; then + echo -e " ${RED}[FAIL]${NC} Stuck containers detected:" + echo "$STUCK_CONTAINERS" | while read c; do echo " - $c"; done + FAILURES+=("Stuck containers: $(echo $STUCK_CONTAINERS | tr '\n' ' ')") +fi + echo "" echo "Auth & Dashboard:" check_service "Auth API" "http://localhost:3001/health" -check_service "Dashboard Web" "http://localhost:5173/health" +check_service "Dashboard Web" "http://localhost:5000/health" echo "" echo "Chat:" -check_service "Chat Backend" "http://localhost:3002/health" -check_service "Chat Web" "http://localhost:3000/health" +check_service "Chat Backend" "http://localhost:3030/health" +check_service "Chat Web" "http://localhost:5010/health" echo "" echo "Todo:" -check_service "Todo Backend" "http://localhost:3018/health" -check_service "Todo Web" "http://localhost:5188/health" +check_service "Todo Backend" "http://localhost:3031/health" +check_service "Todo Web" "http://localhost:5011/health" echo "" echo "Calendar:" -check_service "Calendar Backend" "http://localhost:3016/health" -check_service "Calendar Web" "http://localhost:5186/health" +check_service "Calendar Backend" "http://localhost:3032/health" +check_service "Calendar Web" "http://localhost:5012/health" echo "" echo "Clock:" -check_service "Clock Backend" "http://localhost:3017/health" -check_service "Clock Web" "http://localhost:5187/health" +check_service "Clock Backend" "http://localhost:3033/health" +check_service "Clock Web" "http://localhost:5013/health" echo "" echo "Contacts:" -check_service "Contacts Backend" "http://localhost:3015/health" -check_service "Contacts Web" "http://localhost:5184/health" +check_service "Contacts Backend" "http://localhost:3034/health" +check_service "Contacts Web" "http://localhost:5014/health" echo "" echo "Storage:" -check_service "Storage Backend" "http://localhost:3019/api/v1/health" -check_service "Storage Web" "http://localhost:5185/health" +check_service "Storage Backend" "http://localhost:3035/api/v1/health" +check_service "Storage Web" "http://localhost:5015/health" echo "" echo "Presi:" -check_service "Presi Backend" "http://localhost:3008/api/v1/health" -check_service "Presi Web" "http://localhost:5178/health" +check_service "Presi Backend" "http://localhost:3036/api/v1/health" +check_service "Presi Web" "http://localhost:5016/health" echo "" -echo "Matrix (DSGVO-konform):" -check_service "Synapse" "http://localhost:8008/health" -check_service "Element Web" "http://localhost:8087/" -check_service "Matrix Ollama Bot" "http://localhost:3311/health" -check_service "Matrix Stats Bot" "http://localhost:3312/health" -check_service "Matrix Project Doc Bot" "http://localhost:3313/health" +echo "NutriPhi:" +check_service "NutriPhi Backend" "http://localhost:3037/api/v1/health" +check_service "NutriPhi Web" "http://localhost:5017/health" + +echo "" +echo "SkillTree:" +check_service "SkillTree Backend" "http://localhost:3038/health" +check_service "SkillTree Web" "http://localhost:5018/health" + +echo "" +echo "Photos:" +check_service "Photos Backend" "http://localhost:3039/api/v1/health" +check_service "Photos Web" "http://localhost:5019/health" + +echo "" +echo "Core Services:" +check_service "API Gateway" "http://localhost:3010/health" +check_service "Search Service" "http://localhost:3020/health" +check_service "Media Service" "http://localhost:3015/api/v1/health" +check_service "LLM Service" "http://localhost:3025/health" + +echo "" +echo "Matrix:" +check_service "Synapse" "http://localhost:4000/health" +check_service "Element Web" "http://localhost:4080/" +check_service "Matrix Web" "http://localhost:4090/health" +check_service "Matrix Mana Bot" "http://localhost:4010/health" +check_service "Matrix Ollama Bot" "http://localhost:4011/health" +check_service "Matrix Stats Bot" "http://localhost:4012/health" +check_service "Matrix Project Doc Bot" "http://localhost:4013/health" + +echo "" +echo "Monitoring:" +check_service "Grafana" "http://localhost:8000/api/health" +check_service "Umami" "http://localhost:8010/api/heartbeat" +check_service "VictoriaMetrics" "http://localhost:9090/health" echo "" echo "Cloudflare Tunnel:" diff --git a/scripts/mac-mini/launchd/com.manacore.ensure-containers.plist b/scripts/mac-mini/launchd/com.manacore.ensure-containers.plist new file mode 100644 index 000000000..f147783a6 --- /dev/null +++ b/scripts/mac-mini/launchd/com.manacore.ensure-containers.plist @@ -0,0 +1,32 @@ + + + + + Label + com.manacore.ensure-containers + + ProgramArguments + + /bin/bash + /Users/till/projects/manacore-monorepo/scripts/mac-mini/ensure-containers-running.sh + + + StartInterval + 300 + + RunAtLoad + + + StandardOutPath + /tmp/manacore-ensure-containers.log + + StandardErrorPath + /tmp/manacore-ensure-containers.log + + EnvironmentVariables + + PATH + /usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin + + +