refactor: remove local AI services from Mac Mini, GPU-only architecture

- Deactivate Ollama, FLUX.2, and Telegram Bot LaunchAgents on Mac Mini - Remove extra_hosts from mana-llm (no longer needs host.docker.internal) - Update health-check.sh to monitor GPU server services instead of local - Update status.sh to show GPU server status instead of native services - Rewrite MAC_MINI_SERVER.md: remove ~400 lines of Ollama/FLUX/Bot docs, add GPU server architecture diagram and deactivation notes - Update CAPACITY_PLANNING.md with post-offload numbers (~80-150 peak users) Mac Mini is now a pure hosting server (Web, API, DB, Sync). All AI workloads run on GPU server (RTX 3090) via LAN. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-14 22:41:09 +02:00 · 2026-03-28 21:23:37 +01:00 · 2026-03-28 21:23:37 +01:00 · b45ddbbb83
commit b45ddbbb83
parent 99f15955fe
5 changed files with 109 additions and 369 deletions
--- a/scripts/mac-mini/health-check.sh
+++ b/scripts/mac-mini/health-check.sh
@ -254,11 +254,17 @@ check_service "Photos Web" "http://localhost:5019/health"

 echo ""
 echo "Core Services:"
-# API Gateway disabled - no GHCR image, no Dockerfile
 check_service "Search Service" "http://localhost:3020/api/v1/health"
 check_service "Media Service" "http://localhost:3015/api/v1/health"
 check_service "LLM Service" "http://localhost:3025/health"

+echo ""
+echo "GPU Server (192.168.178.11):"
+check_service "GPU Ollama" "http://192.168.178.11:11434/api/version" 3
+check_service "GPU STT" "http://192.168.178.11:3020/health" 3
+check_service "GPU TTS" "http://192.168.178.11:3022/health" 3
+check_service "GPU Image Gen" "http://192.168.178.11:3023/health" 3
+
 echo ""
 echo "Matrix:"
 check_service "Synapse" "http://localhost:4000/health"
--- a/scripts/mac-mini/status.sh
+++ b/scripts/mac-mini/status.sh
@ -46,7 +46,6 @@ check_launchd() {
 check_launchd "com.cloudflare.cloudflared" "Cloudflared Tunnel"
 check_launchd "com.manacore.docker-startup" "Docker Startup"
 check_launchd "com.manacore.health-check" "Health Check (5min)"
-check_launchd "com.manacore.stt" "STT Service (Whisper/Voxtral)"

 # ============================================
 # Docker Status
@ -85,25 +84,25 @@ if docker info >/dev/null 2>&1; then
 fi

 # ============================================
-# Native Services (non-Docker)
+# GPU Server (192.168.178.11)
 # ============================================
 echo ""
-echo -e "${BOLD}Native Services:${NC}"
+echo -e "${BOLD}GPU Server (192.168.178.11):${NC}"

-# Ollama
-if curl -s --max-time 2 http://localhost:11434/api/tags >/dev/null 2>&1; then
-    OLLAMA_MODELS=$(curl -s http://localhost:11434/api/tags | grep -o '"name":"[^"]*"' | wc -l | tr -d ' ')
-    echo -e "  ${GREEN}[Running]${NC} Ollama (${OLLAMA_MODELS} models)"
-else
-    echo -e "  ${YELLOW}[Stopped]${NC} Ollama"
-fi
+check_gpu_service() {
+    local name=$1
+    local url=$2
+    if curl -s --max-time 3 "$url" >/dev/null 2>&1; then
+        echo -e "  ${GREEN}[Running]${NC} $name"
+    else
+        echo -e "  ${YELLOW}[Offline]${NC} $name"
+    fi
+}

-# STT Service
-if curl -s --max-time 2 http://localhost:3020/health >/dev/null 2>&1; then
-    echo -e "  ${GREEN}[Running]${NC} STT Service (port 3020)"
-else
-    echo -e "  ${YELLOW}[Stopped]${NC} STT Service"
-fi
+check_gpu_service "Ollama (LLM)" "http://192.168.178.11:11434/api/version"
+check_gpu_service "STT (Whisper)" "http://192.168.178.11:3020/health"
+check_gpu_service "TTS" "http://192.168.178.11:3022/health"
+check_gpu_service "Image Gen (FLUX)" "http://192.168.178.11:3023/health"

 # ============================================
 # Network/Tunnel Status