managarten/docker/grafana/dashboards/uptime.json
Till JS 957060ca55 feat(monitoring): add mana-geocoding + Pelias to prod compose, Prometheus, Grafana, and status.mana.how
Production deployment + observability for the self-hosted geocoding stack:

**docker-compose.macmini.yml**
- New mana-geocoding container (port 3018, internal-only — no traefik
  labels, no Cloudflare route). Uses host.docker.internal to reach the
  Pelias API on the host's pelias compose stack. Dockerfile added under
  services/mana-geocoding/ using the same Bun/Hono pattern as mana-events.

**Prometheus**
- New blackbox-internal job probing mana-geocoding:3018/health, the
  Pelias API on host.docker.internal:4000/v1/status, and Elasticsearch
  at host.docker.internal:9200/_cluster/health. Kept separate from
  blackbox-api which is reserved for public HTTPS endpoints.

**status.mana.how (generate-status-page.sh)**
- Include blackbox-internal in the metric query and add an "Interne
  Dienste" section with its own summary card, right between Infrastruktur
  and GPU Dienste. Summary grid goes from 4 to 5 columns with a
  900px breakpoint.
- friendly_name() now handles http:// URLs and rewrites container-name
  hosts like mana-geocoding:3018/health → "Mana Geocoding",
  host.docker.internal:4000 → "Pelias API",
  host.docker.internal:9200 → "Pelias Elasticsearch".

**Grafana uptime dashboard**
- Add an "Internal" series to the "Alle Dienste — Uptime-Verlauf" panel
- New "Interne Dienste Status" table panel showing per-instance up/down
- New "Geocoding Ø Latenz" stat panel for probe_duration_seconds

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 16:11:01 +02:00

1344 lines
22 KiB
JSON

{
"title": "Mana Uptime",
"uid": "uptime",
"description": "HTTP Uptime aller mana.how Dienste via Blackbox Exporter",
"tags": ["uptime", "blackbox", "http"],
"schemaVersion": 38,
"version": 1,
"refresh": "1m",
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"templating": {
"list": [
{
"name": "datasource",
"type": "datasource",
"pluginId": "prometheus",
"label": "Datasource",
"hide": 0,
"current": {}
}
]
},
"panels": [
{
"type": "row",
"id": 1,
"title": "Zusammenfassung",
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
}
},
{
"type": "stat",
"id": 2,
"title": "Web Apps Online",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 0,
"y": 1
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "yellow",
"value": 10
},
{
"color": "green",
"value": 18
}
]
},
"unit": "short",
"mappings": []
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(probe_success{job=\"blackbox-web\"})",
"refId": "A",
"legendFormat": "Online"
}
]
},
{
"type": "stat",
"id": 3,
"title": "APIs Online",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 4,
"y": 1
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "yellow",
"value": 7
},
{
"color": "green",
"value": 9
}
]
},
"unit": "short",
"mappings": []
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(probe_success{job=\"blackbox-api\"})",
"refId": "A",
"legendFormat": "Online"
}
]
},
{
"type": "stat",
"id": 4,
"title": "Infra Online",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 8,
"y": 1
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "yellow",
"value": 4
},
{
"color": "green",
"value": 5
}
]
},
"unit": "short",
"mappings": []
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(probe_success{job=\"blackbox-infra\"})",
"refId": "A",
"legendFormat": "Online"
}
]
},
{
"type": "stat",
"id": 5,
"title": "GPU Services Online",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 12,
"y": 1
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "yellow",
"value": 2
},
{
"color": "green",
"value": 4
}
]
},
"unit": "short",
"mappings": []
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(probe_success{job=\"blackbox-gpu\"})",
"refId": "A",
"legendFormat": "Online"
}
]
},
{
"type": "stat",
"id": 6,
"title": "Ø Antwortzeit (Web)",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 16,
"y": 1
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 2
},
{
"color": "red",
"value": 5
}
]
},
"unit": "s",
"mappings": []
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "avg(probe_duration_seconds{job=\"blackbox-web\"})",
"refId": "A",
"legendFormat": "Ø ms"
}
]
},
{
"type": "row",
"id": 10,
"title": "Web Apps",
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 5
}
},
{
"type": "table",
"id": 11,
"title": "Web App Status",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 14,
"w": 12,
"x": 0,
"y": 6
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "left",
"displayMode": "color-background"
},
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN",
"color": "red"
},
"1": {
"text": "UP",
"color": "green"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "instance"
},
"properties": [
{
"id": "displayName",
"value": "URL"
},
{
"id": "custom.width",
"value": 260
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "displayName",
"value": "Status"
},
{
"id": "custom.width",
"value": 80
}
]
}
]
},
"options": {
"sortBy": [
{
"displayName": "Status",
"desc": false
}
],
"footer": {
"show": false
}
},
"transformations": [
{
"id": "labelsToFields",
"options": {
"mode": "columns",
"keepLabels": ["instance"]
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"job": true,
"__name__": true
},
"indexByName": {
"instance": 0,
"Value": 1
}
}
}
],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "probe_success{job=\"blackbox-web\"}",
"instant": true,
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "timeseries",
"id": 12,
"title": "Web App Antwortzeiten",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 14,
"w": 12,
"x": 12,
"y": 6
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"lineWidth": 1,
"fillOpacity": 10
},
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 5
}
]
}
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "table",
"placement": "bottom",
"calcs": ["lastNotNull", "mean"]
},
"tooltip": {
"mode": "multi"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "probe_duration_seconds{job=\"blackbox-web\"}",
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "row",
"id": 20,
"title": "API Health Endpoints",
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 20
}
},
{
"type": "table",
"id": 21,
"title": "API Status",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 21
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "left",
"displayMode": "color-background"
},
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN",
"color": "red"
},
"1": {
"text": "UP",
"color": "green"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "instance"
},
"properties": [
{
"id": "displayName",
"value": "Endpoint"
},
{
"id": "custom.width",
"value": 300
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "displayName",
"value": "Status"
},
{
"id": "custom.width",
"value": 80
}
]
}
]
},
"options": {
"sortBy": [
{
"displayName": "Status",
"desc": false
}
],
"footer": {
"show": false
}
},
"transformations": [
{
"id": "labelsToFields",
"options": {
"mode": "columns",
"keepLabels": ["instance"]
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"job": true
},
"indexByName": {
"instance": 0,
"Value": 1
}
}
}
],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "probe_success{job=\"blackbox-api\"}",
"instant": true,
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "timeseries",
"id": 22,
"title": "API Uptime-Verlauf (24h)",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 21
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"lineWidth": 2,
"fillOpacity": 20
},
"unit": "short",
"min": 0,
"max": 1,
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN"
},
"1": {
"text": "UP"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "table",
"placement": "bottom",
"calcs": ["lastNotNull", "mean"]
},
"tooltip": {
"mode": "multi"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "probe_success{job=\"blackbox-api\"}",
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "row",
"id": 30,
"title": "Infrastruktur & GPU",
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 31
}
},
{
"type": "table",
"id": 31,
"title": "Infra-Dienste Status",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 32
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "left",
"displayMode": "color-background"
},
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN",
"color": "red"
},
"1": {
"text": "UP",
"color": "green"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "instance"
},
"properties": [
{
"id": "displayName",
"value": "Dienst"
},
{
"id": "custom.width",
"value": 220
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "displayName",
"value": "Status"
}
]
}
]
},
"options": {
"sortBy": [
{
"displayName": "Status",
"desc": false
}
],
"footer": {
"show": false
}
},
"transformations": [
{
"id": "labelsToFields",
"options": {
"mode": "columns",
"keepLabels": ["instance"]
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"job": true
},
"indexByName": {
"instance": 0,
"Value": 1
}
}
}
],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "probe_success{job=\"blackbox-infra\"}",
"instant": true,
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "table",
"id": 32,
"title": "GPU Server Status",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 32
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "left",
"displayMode": "color-background"
},
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN",
"color": "red"
},
"1": {
"text": "UP",
"color": "green"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "instance"
},
"properties": [
{
"id": "displayName",
"value": "Dienst"
},
{
"id": "custom.width",
"value": 220
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "displayName",
"value": "Status"
}
]
}
]
},
"options": {
"sortBy": [
{
"displayName": "Status",
"desc": false
}
],
"footer": {
"show": false
}
},
"transformations": [
{
"id": "labelsToFields",
"options": {
"mode": "columns",
"keepLabels": ["instance"]
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"job": true
},
"indexByName": {
"instance": 0,
"Value": 1
}
}
}
],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "probe_success{job=\"blackbox-gpu\"}",
"instant": true,
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "timeseries",
"id": 33,
"title": "Alle Dienste — Uptime-Verlauf",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 32
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"lineWidth": 2,
"fillOpacity": 10
},
"unit": "short",
"min": 0,
"max": 1,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "avg(probe_success{job=\"blackbox-web\"})",
"refId": "A",
"legendFormat": "Web Apps"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "avg(probe_success{job=\"blackbox-api\"})",
"refId": "B",
"legendFormat": "APIs"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "avg(probe_success{job=\"blackbox-infra\"})",
"refId": "C",
"legendFormat": "Infra"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "avg(probe_success{job=\"blackbox-internal\"})",
"refId": "E",
"legendFormat": "Internal"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "avg(probe_success{job=\"blackbox-gpu\"})",
"refId": "D",
"legendFormat": "GPU"
}
]
},
{
"type": "table",
"id": 34,
"title": "Interne Dienste Status",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 40
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "left",
"displayMode": "color-background"
},
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN",
"color": "red"
},
"1": {
"text": "UP",
"color": "green"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "instance"
},
"properties": [
{
"id": "displayName",
"value": "Dienst"
},
{
"id": "custom.width",
"value": 320
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "displayName",
"value": "Status"
}
]
}
]
},
"options": {
"sortBy": [
{
"displayName": "Status",
"desc": false
}
],
"footer": {
"show": false
}
},
"transformations": [
{
"id": "labelsToFields",
"options": {
"mode": "columns",
"keepLabels": ["instance"]
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"job": true
},
"indexByName": {
"instance": 0,
"Value": 1
}
}
}
],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "probe_success{job=\"blackbox-internal\"}",
"instant": true,
"refId": "A",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "stat",
"id": 35,
"title": "Geocoding Ø Latenz",
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 40
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 0.5
},
{
"color": "red",
"value": 2
}
]
},
"unit": "s",
"mappings": []
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "center",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "avg(probe_duration_seconds{instance=~\".*mana-geocoding.*\"})",
"refId": "A",
"legendFormat": "Ø"
}
]
}
]
}