feat(monitoring): add uptime monitoring via Blackbox Exporter

- scripts/check-status.sh: parallel HTTP check aller mana.how Domains aus cloudflared-config.yml
- docker/blackbox/blackbox.yml: Blackbox Exporter Config (http_2xx, http_health Module)
- docker-compose.macmini.yml: blackbox-exporter Container (Port 9115, 32MB RAM)
- docker/prometheus/prometheus.yml: 4 Scrape-Jobs (blackbox-web, blackbox-api, blackbox-infra, blackbox-gpu)
- docker/prometheus/alerts.yml: 5 Alert-Regeln (WebAppDown, APIDown, InfraToolDown, GPUServiceDown, SlowHTTPResponse)
- docker/grafana/dashboards/uptime.json: Grafana Uptime-Dashboard mit Status-Tables und Verlauf
- package.json: check:status Script

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-03-31 17:43:25 +02:00
parent bce533ca8b
commit 402baf7c7f
7 changed files with 984 additions and 0 deletions

View file

@ -242,6 +242,111 @@ scrape_configs:
metrics_path: '/metrics'
scrape_interval: 30s
# ============================================
# Blackbox Exporter — HTTP Uptime Probes
# ============================================
# Web Apps (SvelteKit frontends)
- job_name: 'blackbox-web'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- https://mana.how
- https://chat.mana.how
- https://todo.mana.how
- https://calendar.mana.how
- https://contacts.mana.how
- https://clock.mana.how
- https://photos.mana.how
- https://picture.mana.how
- https://storage.mana.how
- https://presi.mana.how
- https://nutriphi.mana.how
- https://planta.mana.how
- https://calc.mana.how
- https://zitare.mana.how
- https://manadeck.mana.how
- https://skilltree.mana.how
- https://mukke.mana.how
- https://citycorners.mana.how
- https://playground.mana.how
- https://whopxl.mana.how
- https://arcade.mana.how
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
# API Health Endpoints
- job_name: 'blackbox-api'
metrics_path: /probe
params:
module: [http_health]
static_configs:
- targets:
- https://auth.mana.how/health
- https://api.mana.how/health
- https://chat-api.mana.how/health
- https://todo-api.mana.how/health
- https://calendar-api.mana.how/health
- https://contacts-api.mana.how/health
- https://storage-api.mana.how/health
- https://nutriphi-api.mana.how/health
- https://planta-api.mana.how/health
- https://picture-api.mana.how/health
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
# Infrastructure & Monitoring Tools
- job_name: 'blackbox-infra'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- https://git.mana.how
- https://grafana.mana.how
- https://stats.mana.how
- https://glitchtip.mana.how
- https://matrix.mana.how
- https://element.mana.how
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
# GPU Server Services
- job_name: 'blackbox-gpu'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- https://gpu-ollama.mana.how
- https://gpu-stt.mana.how
- https://gpu-tts.mana.how
- https://gpu-img.mana.how
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
# ============================================
# Pushgateway (deploy metrics, batch jobs)
# ============================================