feat(monitoring): add uptime monitoring via Blackbox Exporter

- scripts/check-status.sh: parallel HTTP check aller mana.how Domains aus cloudflared-config.yml
- docker/blackbox/blackbox.yml: Blackbox Exporter Config (http_2xx, http_health Module)
- docker-compose.macmini.yml: blackbox-exporter Container (Port 9115, 32MB RAM)
- docker/prometheus/prometheus.yml: 4 Scrape-Jobs (blackbox-web, blackbox-api, blackbox-infra, blackbox-gpu)
- docker/prometheus/alerts.yml: 5 Alert-Regeln (WebAppDown, APIDown, InfraToolDown, GPUServiceDown, SlowHTTPResponse)
- docker/grafana/dashboards/uptime.json: Grafana Uptime-Dashboard mit Status-Tables und Verlauf
- package.json: check:status Script

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-03-31 17:43:25 +02:00
parent bce533ca8b
commit 402baf7c7f
7 changed files with 984 additions and 0 deletions

View file

@ -1710,6 +1710,23 @@ services:
retries: 3
start_period: 20s
blackbox-exporter:
image: prom/blackbox-exporter:v0.25.0
container_name: mana-mon-blackbox
restart: always
mem_limit: 32m
command: ["--config.file=/etc/blackbox/blackbox.yml"]
volumes:
- ./docker/blackbox/blackbox.yml:/etc/blackbox/blackbox.yml:ro
ports:
- "9115:9115"
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9115/"]
interval: 300s
timeout: 10s
retries: 3
start_period: 10s
# ============================================
# Alerting Stack (Ports 9093-9095)
# ============================================

View file

@ -0,0 +1,24 @@
modules:
# Standard HTTP check: 2xx/3xx = success
http_2xx:
prober: http
timeout: 10s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: [200, 201, 204, 301, 302, 303, 307, 308]
method: GET
follow_redirects: true
preferred_ip_protocol: "ip4"
ip_protocol_fallback: true
# Health endpoint check (expects 200 only)
http_health:
prober: http
timeout: 10s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: [200]
method: GET
follow_redirects: false
preferred_ip_protocol: "ip4"
ip_protocol_fallback: true

View file

@ -0,0 +1,649 @@
{
"title": "ManaCore Uptime",
"uid": "uptime",
"description": "HTTP Uptime aller mana.how Dienste via Blackbox Exporter",
"tags": ["uptime", "blackbox", "http"],
"schemaVersion": 38,
"version": 1,
"refresh": "1m",
"time": { "from": "now-24h", "to": "now" },
"timepicker": {},
"timezone": "browser",
"templating": {
"list": [
{
"name": "datasource",
"type": "datasource",
"pluginId": "prometheus",
"label": "Datasource",
"hide": 0,
"current": {}
}
]
},
"panels": [
{
"type": "row",
"id": 1,
"title": "Zusammenfassung",
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }
},
{
"type": "stat",
"id": 2,
"title": "Web Apps Online",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 10 },
{ "color": "green", "value": 18 }
]
},
"unit": "short",
"mappings": []
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sum(probe_success{job=\"blackbox-web\"})",
"refId": "A",
"legendFormat": "Online"
}
]
},
{
"type": "stat",
"id": 3,
"title": "APIs Online",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 7 },
{ "color": "green", "value": 9 }
]
},
"unit": "short",
"mappings": []
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sum(probe_success{job=\"blackbox-api\"})",
"refId": "A",
"legendFormat": "Online"
}
]
},
{
"type": "stat",
"id": 4,
"title": "Infra Online",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 4 },
{ "color": "green", "value": 5 }
]
},
"unit": "short",
"mappings": []
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sum(probe_success{job=\"blackbox-infra\"})",
"refId": "A",
"legendFormat": "Online"
}
]
},
{
"type": "stat",
"id": 5,
"title": "GPU Services Online",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 2 },
{ "color": "green", "value": 4 }
]
},
"unit": "short",
"mappings": []
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sum(probe_success{job=\"blackbox-gpu\"})",
"refId": "A",
"legendFormat": "Online"
}
]
},
{
"type": "stat",
"id": 6,
"title": "Ø Antwortzeit (Web)",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 2 },
{ "color": "red", "value": 5 }
]
},
"unit": "s",
"mappings": []
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "avg(probe_duration_seconds{job=\"blackbox-web\"})",
"refId": "A",
"legendFormat": "Ø ms"
}
]
},
{
"type": "row",
"id": 10,
"title": "Web Apps",
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }
},
{
"type": "table",
"id": 11,
"title": "Web App Status",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 14, "w": 12, "x": 0, "y": 6 },
"fieldConfig": {
"defaults": {
"custom": {
"align": "left",
"displayMode": "color-background"
},
"mappings": [
{
"type": "value",
"options": {
"0": { "text": "DOWN", "color": "red" },
"1": { "text": "UP", "color": "green" }
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
}
},
"overrides": [
{
"matcher": { "id": "byName", "options": "instance" },
"properties": [
{ "id": "displayName", "value": "URL" },
{ "id": "custom.width", "value": 260 }
]
},
{
"matcher": { "id": "byName", "options": "Value" },
"properties": [
{ "id": "displayName", "value": "Status" },
{ "id": "custom.width", "value": 80 }
]
}
]
},
"options": {
"sortBy": [{ "displayName": "Status", "desc": false }],
"footer": { "show": false }
},
"transformations": [
{ "id": "labelsToFields", "options": { "mode": "columns", "keepLabels": ["instance"] } },
{
"id": "organize",
"options": {
"excludeByName": { "Time": true, "job": true, "__name__": true },
"indexByName": { "instance": 0, "Value": 1 }
}
}
],
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "probe_success{job=\"blackbox-web\"}",
"instant": true,
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "timeseries",
"id": 12,
"title": "Web App Antwortzeiten",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 14, "w": 12, "x": 12, "y": 6 },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "lineWidth": 1, "fillOpacity": 10 },
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "red", "value": 5 }
]
}
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "table",
"placement": "bottom",
"calcs": ["lastNotNull", "mean"]
},
"tooltip": { "mode": "multi" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "probe_duration_seconds{job=\"blackbox-web\"}",
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "row",
"id": 20,
"title": "API Health Endpoints",
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 }
},
{
"type": "table",
"id": 21,
"title": "API Status",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 21 },
"fieldConfig": {
"defaults": {
"custom": { "align": "left", "displayMode": "color-background" },
"mappings": [
{
"type": "value",
"options": {
"0": { "text": "DOWN", "color": "red" },
"1": { "text": "UP", "color": "green" }
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
}
},
"overrides": [
{
"matcher": { "id": "byName", "options": "instance" },
"properties": [
{ "id": "displayName", "value": "Endpoint" },
{ "id": "custom.width", "value": 300 }
]
},
{
"matcher": { "id": "byName", "options": "Value" },
"properties": [
{ "id": "displayName", "value": "Status" },
{ "id": "custom.width", "value": 80 }
]
}
]
},
"options": {
"sortBy": [{ "displayName": "Status", "desc": false }],
"footer": { "show": false }
},
"transformations": [
{ "id": "labelsToFields", "options": { "mode": "columns", "keepLabels": ["instance"] } },
{
"id": "organize",
"options": {
"excludeByName": { "Time": true, "job": true },
"indexByName": { "instance": 0, "Value": 1 }
}
}
],
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "probe_success{job=\"blackbox-api\"}",
"instant": true,
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "timeseries",
"id": 22,
"title": "API Uptime-Verlauf (24h)",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 21 },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "lineWidth": 2, "fillOpacity": 20 },
"unit": "short",
"min": 0,
"max": 1,
"mappings": [
{ "type": "value", "options": { "0": { "text": "DOWN" }, "1": { "text": "UP" } } }
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
}
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "table",
"placement": "bottom",
"calcs": ["lastNotNull", "mean"]
},
"tooltip": { "mode": "multi" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "probe_success{job=\"blackbox-api\"}",
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "row",
"id": 30,
"title": "Infrastruktur & GPU",
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 31 }
},
{
"type": "table",
"id": 31,
"title": "Infra-Dienste Status",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 32 },
"fieldConfig": {
"defaults": {
"custom": { "align": "left", "displayMode": "color-background" },
"mappings": [
{
"type": "value",
"options": {
"0": { "text": "DOWN", "color": "red" },
"1": { "text": "UP", "color": "green" }
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
}
},
"overrides": [
{
"matcher": { "id": "byName", "options": "instance" },
"properties": [
{ "id": "displayName", "value": "Dienst" },
{ "id": "custom.width", "value": 220 }
]
},
{
"matcher": { "id": "byName", "options": "Value" },
"properties": [{ "id": "displayName", "value": "Status" }]
}
]
},
"options": {
"sortBy": [{ "displayName": "Status", "desc": false }],
"footer": { "show": false }
},
"transformations": [
{ "id": "labelsToFields", "options": { "mode": "columns", "keepLabels": ["instance"] } },
{
"id": "organize",
"options": {
"excludeByName": { "Time": true, "job": true },
"indexByName": { "instance": 0, "Value": 1 }
}
}
],
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "probe_success{job=\"blackbox-infra\"}",
"instant": true,
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "table",
"id": 32,
"title": "GPU Server Status",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 32 },
"fieldConfig": {
"defaults": {
"custom": { "align": "left", "displayMode": "color-background" },
"mappings": [
{
"type": "value",
"options": {
"0": { "text": "DOWN", "color": "red" },
"1": { "text": "UP", "color": "green" }
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
}
},
"overrides": [
{
"matcher": { "id": "byName", "options": "instance" },
"properties": [
{ "id": "displayName", "value": "Dienst" },
{ "id": "custom.width", "value": 220 }
]
},
{
"matcher": { "id": "byName", "options": "Value" },
"properties": [{ "id": "displayName", "value": "Status" }]
}
]
},
"options": {
"sortBy": [{ "displayName": "Status", "desc": false }],
"footer": { "show": false }
},
"transformations": [
{ "id": "labelsToFields", "options": { "mode": "columns", "keepLabels": ["instance"] } },
{
"id": "organize",
"options": {
"excludeByName": { "Time": true, "job": true },
"indexByName": { "instance": 0, "Value": 1 }
}
}
],
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "probe_success{job=\"blackbox-gpu\"}",
"instant": true,
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "timeseries",
"id": 33,
"title": "Alle Dienste — Uptime-Verlauf",
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 32 },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "lineWidth": 2, "fillOpacity": 10 },
"unit": "short",
"min": 0,
"max": 1,
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
}
},
"overrides": []
},
"options": {
"legend": { "displayMode": "list", "placement": "bottom" },
"tooltip": { "mode": "multi" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "avg(probe_success{job=\"blackbox-web\"})",
"refId": "A",
"legendFormat": "Web Apps"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "avg(probe_success{job=\"blackbox-api\"})",
"refId": "B",
"legendFormat": "APIs"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "avg(probe_success{job=\"blackbox-infra\"})",
"refId": "C",
"legendFormat": "Infra"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "avg(probe_success{job=\"blackbox-gpu\"})",
"refId": "D",
"legendFormat": "GPU"
}
]
}
]
}

View file

@ -367,6 +367,58 @@ groups:
summary: "OIDC token endpoint errors"
description: "OIDC token endpoint is returning 5xx errors. SSO may be affected."
- name: uptime_alerts
rules:
# Web App offline (HTTP probe failed)
- alert: WebAppDown
expr: probe_success{job="blackbox-web"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Web App offline: {{ $labels.instance }}"
description: "{{ $labels.instance }} hat seit 2 Minuten keine gültige HTTP-Antwort zurückgegeben."
# API Health Endpoint offline
- alert: APIDown
expr: probe_success{job="blackbox-api"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "API offline: {{ $labels.instance }}"
description: "{{ $labels.instance }} antwortet nicht auf den Health-Endpoint."
# Infra Tool offline (Grafana, Git, etc.)
- alert: InfraToolDown
expr: probe_success{job="blackbox-infra"} == 0
for: 3m
labels:
severity: warning
annotations:
summary: "Infra-Dienst offline: {{ $labels.instance }}"
description: "{{ $labels.instance }} ist seit 3 Minuten nicht erreichbar."
# GPU Server Service offline
- alert: GPUServiceDown
expr: probe_success{job="blackbox-gpu"} == 0
for: 5m
labels:
severity: warning
annotations:
summary: "GPU-Dienst offline: {{ $labels.instance }}"
description: "{{ $labels.instance }} (GPU-Server) ist seit 5 Minuten nicht erreichbar."
# Slow HTTP response (> 5s)
- alert: SlowHTTPResponse
expr: probe_duration_seconds{job=~"blackbox-web|blackbox-api"} > 5
for: 5m
labels:
severity: warning
annotations:
summary: "Langsame HTTP-Antwort: {{ $labels.instance }}"
description: "{{ $labels.instance }} antwortet mit {{ $value | humanizeDuration }} (> 5s)."
- name: llm_alerts
rules:
# mana-llm Down

View file

@ -242,6 +242,111 @@ scrape_configs:
metrics_path: '/metrics'
scrape_interval: 30s
# ============================================
# Blackbox Exporter — HTTP Uptime Probes
# ============================================
# Web Apps (SvelteKit frontends)
- job_name: 'blackbox-web'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- https://mana.how
- https://chat.mana.how
- https://todo.mana.how
- https://calendar.mana.how
- https://contacts.mana.how
- https://clock.mana.how
- https://photos.mana.how
- https://picture.mana.how
- https://storage.mana.how
- https://presi.mana.how
- https://nutriphi.mana.how
- https://planta.mana.how
- https://calc.mana.how
- https://zitare.mana.how
- https://manadeck.mana.how
- https://skilltree.mana.how
- https://mukke.mana.how
- https://citycorners.mana.how
- https://playground.mana.how
- https://whopxl.mana.how
- https://arcade.mana.how
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
# API Health Endpoints
- job_name: 'blackbox-api'
metrics_path: /probe
params:
module: [http_health]
static_configs:
- targets:
- https://auth.mana.how/health
- https://api.mana.how/health
- https://chat-api.mana.how/health
- https://todo-api.mana.how/health
- https://calendar-api.mana.how/health
- https://contacts-api.mana.how/health
- https://storage-api.mana.how/health
- https://nutriphi-api.mana.how/health
- https://planta-api.mana.how/health
- https://picture-api.mana.how/health
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
# Infrastructure & Monitoring Tools
- job_name: 'blackbox-infra'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- https://git.mana.how
- https://grafana.mana.how
- https://stats.mana.how
- https://glitchtip.mana.how
- https://matrix.mana.how
- https://element.mana.how
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
# GPU Server Services
- job_name: 'blackbox-gpu'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- https://gpu-ollama.mana.how
- https://gpu-stt.mana.how
- https://gpu-tts.mana.how
- https://gpu-img.mana.how
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
# ============================================
# Pushgateway (deploy metrics, batch jobs)
# ============================================

View file

@ -16,6 +16,7 @@
"clean": "turbo run clean",
"format": "prettier --config .prettierrc.json --write \"**/*.{ts,tsx,js,jsx,json,md,svelte,astro}\"",
"format:check": "prettier --config .prettierrc.json --check \"**/*.{ts,tsx,js,jsx,json,md,svelte,astro}\"",
"check:status": "bash scripts/check-status.sh",
"validate:dockerfiles": "node scripts/validate-dockerfiles.mjs",
"audit:deps": "node scripts/audit-workspace-deps.mjs",
"generate:dockerfiles": "node scripts/generate-dockerfiles.mjs",

136
scripts/check-status.sh Executable file
View file

@ -0,0 +1,136 @@
#!/usr/bin/env bash
# Requires: bash 3+, curl, python3 (for yaml parsing) or grep
# check-status.sh — Prüft die Erreichbarkeit aller mana.how-Dienste
# Liest direkt aus cloudflared-config.yml (Single Source of Truth)
# Usage: ./scripts/check-status.sh [--internal]
# --internal Prüft interne Ports statt externe Domains
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
CLOUDFLARED_CONFIG="$REPO_ROOT/cloudflared-config.yml"
TIMEOUT=8
INTERNAL=false
[[ "${1:-}" == "--internal" ]] && INTERNAL=true
# Farben
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
GRAY='\033[0;90m'
BOLD='\033[1m'
NC='\033[0m'
# Zähler
ok=0; warn=0; fail=0; total=0
# Temporäres Verzeichnis für parallele Ergebnisse
tmpdir=$(mktemp -d)
trap 'rm -rf "$tmpdir"' EXIT
check_url() {
local url="$1"
local label="$2"
local outfile="$3"
# Note: curl outputs "000" itself on connection failure, so no || fallback needed
local code
code=$(curl -o /dev/null -s -w "%{http_code}" --max-time "$TIMEOUT" "$url" 2>/dev/null)
local icon
if [[ "$code" =~ ^(200|201|204|301|302|303|307|308)$ ]]; then
icon="✅"
elif [[ "$code" =~ ^4 ]]; then
# 4xx = server reachable, wrong path (API root returns 404 — use health endpoint)
icon="⚠️"
elif [[ "$code" == "000" ]]; then
icon="⏱"
else
# 5xx or unknown
icon="❌"
fi
printf "%s|%s|%s|%s\n" "$icon" "$code" "$label" "$url" > "$outfile"
}
echo ""
echo -e "${BOLD}ManaCore Service Status${NC} $(date '+%Y-%m-%d %H:%M:%S')"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Alle Hostnamen aus cloudflared-config.yml extrahieren (ohne ssh.mana.how)
# Bash 3-kompatibel (kein mapfile)
hostnames=()
while IFS= read -r host; do
hostnames+=("$host")
done < <(
grep "hostname:" "$CLOUDFLARED_CONFIG" \
| awk '{print $3}' \
| grep -v "^ssh\." \
| sort -u
)
# Parallel prüfen
i=0
for host in "${hostnames[@]}"; do
url="https://$host"
check_url "$url" "$host" "$tmpdir/$i" &
i=$((i + 1))
done
wait
# Ergebnisse sammeln und sortieren
declare -a results_ok=()
declare -a results_warn=()
declare -a results_fail=()
declare -a results_4xx=()
for f in "$tmpdir"/*; do
[[ -f "$f" ]] || continue
IFS='|' read -r icon code label url < "$f"
total=$((total + 1))
line=$(printf " %s %-38s %s %s" "$icon" "$label" "$code" "$url")
if [[ "$icon" == "✅" ]]; then
results_ok+=("$line")
ok=$((ok + 1))
elif [[ "$icon" == "⏱" ]]; then
results_warn+=("$line")
warn=$((warn + 1))
elif [[ "$icon" == "⚠️" ]]; then
results_4xx+=("$line")
else
results_fail+=("$line")
fail=$((fail + 1))
fi
done
# Ausgabe
if [[ ${#results_ok[@]} -gt 0 ]]; then
echo ""
echo -e "${GREEN}${BOLD}ONLINE (${#results_ok[@]})${NC}"
for line in "${results_ok[@]}"; do echo -e "${GREEN}${line}${NC}"; done
fi
if [[ ${#results_4xx[@]} -gt 0 ]]; then
echo ""
echo -e "${YELLOW}${BOLD}ERREICHBAR / 4xx — Root-Pfad nicht definiert (${#results_4xx[@]})${NC}"
for line in "${results_4xx[@]}"; do echo -e "${YELLOW}${line}${NC}"; done
fi
if [[ ${#results_fail[@]} -gt 0 ]]; then
echo ""
echo -e "${RED}${BOLD}NICHT ERREICHBAR / 5xx (${#results_fail[@]})${NC}"
for line in "${results_fail[@]}"; do echo -e "${RED}${line}${NC}"; done
fi
if [[ ${#results_warn[@]} -gt 0 ]]; then
echo ""
echo -e "${YELLOW}${BOLD}TIMEOUT / KEIN DNS (${#results_warn[@]})${NC}"
for line in "${results_warn[@]}"; do echo -e "${YELLOW}${line}${NC}"; done
fi
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo -e " ${GREEN}✅ Online: $ok${NC} ${RED}❌ Down: $fail${NC} ${YELLOW}⏱ Timeout: $warn${NC} (Gesamt: $total)"
echo ""