refactor: remove local AI services from Mac Mini, GPU-only architecture

- Deactivate Ollama, FLUX.2, and Telegram Bot LaunchAgents on Mac Mini
- Remove extra_hosts from mana-llm (no longer needs host.docker.internal)
- Update health-check.sh to monitor GPU server services instead of local
- Update status.sh to show GPU server status instead of native services
- Rewrite MAC_MINI_SERVER.md: remove ~400 lines of Ollama/FLUX/Bot docs,
  add GPU server architecture diagram and deactivation notes
- Update CAPACITY_PLANNING.md with post-offload numbers (~80-150 peak users)

Mac Mini is now a pure hosting server (Web, API, DB, Sync).
All AI workloads run on GPU server (RTX 3090) via LAN.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-03-28 21:23:37 +01:00
parent 99f15955fe
commit b45ddbbb83
5 changed files with 109 additions and 369 deletions

View file

@ -254,11 +254,17 @@ check_service "Photos Web" "http://localhost:5019/health"
echo ""
echo "Core Services:"
# API Gateway disabled - no GHCR image, no Dockerfile
check_service "Search Service" "http://localhost:3020/api/v1/health"
check_service "Media Service" "http://localhost:3015/api/v1/health"
check_service "LLM Service" "http://localhost:3025/health"
echo ""
echo "GPU Server (192.168.178.11):"
check_service "GPU Ollama" "http://192.168.178.11:11434/api/version" 3
check_service "GPU STT" "http://192.168.178.11:3020/health" 3
check_service "GPU TTS" "http://192.168.178.11:3022/health" 3
check_service "GPU Image Gen" "http://192.168.178.11:3023/health" 3
echo ""
echo "Matrix:"
check_service "Synapse" "http://localhost:4000/health"

View file

@ -46,7 +46,6 @@ check_launchd() {
check_launchd "com.cloudflare.cloudflared" "Cloudflared Tunnel"
check_launchd "com.manacore.docker-startup" "Docker Startup"
check_launchd "com.manacore.health-check" "Health Check (5min)"
check_launchd "com.manacore.stt" "STT Service (Whisper/Voxtral)"
# ============================================
# Docker Status
@ -85,25 +84,25 @@ if docker info >/dev/null 2>&1; then
fi
# ============================================
# Native Services (non-Docker)
# GPU Server (192.168.178.11)
# ============================================
echo ""
echo -e "${BOLD}Native Services:${NC}"
echo -e "${BOLD}GPU Server (192.168.178.11):${NC}"
# Ollama
if curl -s --max-time 2 http://localhost:11434/api/tags >/dev/null 2>&1; then
OLLAMA_MODELS=$(curl -s http://localhost:11434/api/tags | grep -o '"name":"[^"]*"' | wc -l | tr -d ' ')
echo -e " ${GREEN}[Running]${NC} Ollama (${OLLAMA_MODELS} models)"
else
echo -e " ${YELLOW}[Stopped]${NC} Ollama"
fi
check_gpu_service() {
local name=$1
local url=$2
if curl -s --max-time 3 "$url" >/dev/null 2>&1; then
echo -e " ${GREEN}[Running]${NC} $name"
else
echo -e " ${YELLOW}[Offline]${NC} $name"
fi
}
# STT Service
if curl -s --max-time 2 http://localhost:3020/health >/dev/null 2>&1; then
echo -e " ${GREEN}[Running]${NC} STT Service (port 3020)"
else
echo -e " ${YELLOW}[Stopped]${NC} STT Service"
fi
check_gpu_service "Ollama (LLM)" "http://192.168.178.11:11434/api/version"
check_gpu_service "STT (Whisper)" "http://192.168.178.11:3020/health"
check_gpu_service "TTS" "http://192.168.178.11:3022/health"
check_gpu_service "Image Gen (FLUX)" "http://192.168.178.11:3023/health"
# ============================================
# Network/Tunnel Status