feat(monitoring): add GlitchTip health check and disk space monitoring

- Add GlitchTip to health-check.sh monitoring endpoints
- Add native disk space checks for / and /Volumes/ManaData with 80%/90% thresholds
- Extend Prometheus disk alerts to include /host_mnt/Volumes/ManaData mountpoint
- Add ManaData disk usage gauge to Grafana system-overview dashboard

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-03-23 09:33:09 +01:00
parent a12ec68fc2
commit c8de944c8d
3 changed files with 81 additions and 7 deletions

View file

@ -273,6 +273,7 @@ echo ""
echo "Monitoring:"
check_service "Grafana" "http://localhost:8000/api/health"
check_service "Umami" "http://localhost:8010/api/heartbeat"
check_service "GlitchTip" "http://localhost:8020/_health/"
check_service "VictoriaMetrics" "http://localhost:9090/health"
echo ""
@ -281,6 +282,36 @@ check_service "vmalert" "http://localhost:8880/health"
check_service "Alertmanager" "http://localhost:9093/-/healthy"
check_service "Alert Notifier" "http://localhost:9095/health"
echo ""
echo "Disk Space:"
check_disk() {
local name=$1
local path=$2
local warn_pct=${3:-80}
local crit_pct=${4:-90}
if [ ! -d "$path" ]; then
echo -e " ${YELLOW}[SKIP]${NC} $name ($path not found)"
return 0
fi
local usage_pct=$(df "$path" | tail -1 | awk '{gsub(/%/,""); print $5}')
local avail=$(df -h "$path" | tail -1 | awk '{print $4}')
if [ "$usage_pct" -ge "$crit_pct" ]; then
echo -e " ${RED}[CRIT]${NC} $name: ${usage_pct}% used ($avail free)"
FAILURES+=("Disk $name: ${usage_pct}% (critical)")
elif [ "$usage_pct" -ge "$warn_pct" ]; then
echo -e " ${YELLOW}[WARN]${NC} $name: ${usage_pct}% used ($avail free)"
FAILURES+=("Disk $name: ${usage_pct}% (warning)")
else
echo -e " ${GREEN}[OK]${NC} $name: ${usage_pct}% used ($avail free)"
fi
}
check_disk "System (/)" "/"
check_disk "ManaData" "/Volumes/ManaData"
echo ""
echo "Cloudflare Tunnel:"
if pgrep -x "cloudflared" >/dev/null; then