#!/usr/bin/env sh # generate-status-page.sh # Fragt VictoriaMetrics ab und erzeugt eine statische HTML-Statusseite. # Läuft in einem Alpine-Container im Docker-Netz (http://victoriametrics:9090) # oder auf dem Host (http://localhost:9090). # # Datenquellen: # - Service-Uptime: Blackbox Exporter via VictoriaMetrics # - App Release Tiers: Automatisch aus mana-apps.ts geparst (Volume-Mount) # # Ausgabe: /output/index.html + /output/status.json # set -e dropped — too many false positives from expected non-zero exits # in subshells ($(render_rows ...) under heredoc expansion) and from # conditional tests inside the while-read tier_apps loop. The script is # long and best-effort; we accept partial failures over silent exits # before the jq that writes status.json. set -u VM_URL="${VICTORIAMETRICS_URL:-http://victoriametrics:9090}" OUTPUT="${OUTPUT_FILE:-/output/index.html}" TMPDIR_LOCAL="$(mktemp -d)" trap 'rm -rf "$TMPDIR_LOCAL"' EXIT # ── Daten aus VictoriaMetrics holen ──────────────────────────────────────── fetch_metric() { curl -sf --max-time 10 \ "${VM_URL}/api/v1/query?query=$(printf '%s' "$1" | sed 's/ /%20/g;s/{/%7B/g;s/}/%7D/g;s/=~/%3D~/g;s/|/%7C/g;s/"/%22/g')" \ 2>/dev/null || echo '{"status":"error","data":{"result":[]}}' } SUCCESS_JSON="$(fetch_metric 'probe_success{job=~"blackbox-web|blackbox-api|blackbox-infra|blackbox-internal|blackbox-gpu"}')" DURATION_JSON="$(fetch_metric 'probe_duration_seconds{job=~"blackbox-web|blackbox-api|blackbox-infra|blackbox-internal|blackbox-gpu"}')" # ── Hilfsfunktionen ───────────────────────────────────────────────────────── # Gibt den probe_success-Wert für eine Instanz zurück (0 oder 1) get_success() { instance="$1" echo "$SUCCESS_JSON" | jq -r --arg inst "$instance" \ '.data.result[] | select(.metric.instance == $inst) | .value[1]' 2>/dev/null || echo "0" } # Gibt die Antwortzeit in ms zurück get_duration_ms() { instance="$1" val=$(echo "$DURATION_JSON" | jq -r --arg inst "$instance" \ '.data.result[] | select(.metric.instance == $inst) | .value[1]' 2>/dev/null || echo "") if [ -n "$val" ] && [ "$val" != "null" ]; then printf "%.0f" "$(echo "$val * 1000" | awk '{printf "%.1f", $1}')" else echo "" fi } # Alle Instanzen einer Job-Gruppe, sortiert get_instances() { job="$1" echo "$SUCCESS_JSON" | jq -r --arg job "$job" \ '.data.result[] | select(.metric.job == $job) | .metric.instance' 2>/dev/null | sort } # Freundlicher Name aus URL friendly_name() { url="$1" # Entferne https:// oder http:// name="${url#https://}" name="${name#http://}" # Interne Services (Docker-Netz): mana-geocoding:3018/health → Mana Geocoding case "$name" in mana-geocoding:*/health/photon-self) name="Photon-Self (via Geocoding)" ;; mana-geocoding:*) name="Mana Geocoding" ;; mana-ai:*) name="Mana AI Runner" ;; mana.how/*) name="${name#mana.how/}" ;; *) # Subdomain-basiert: chat.mana.how → Chat name="${name%.mana.how}" ;; esac # Entferne /health suffixe name="${name%/health}" # mana.how (ohne Route) → Mana [ "$name" = "mana.how" ] && name="Mana" # Erster Buchstabe groß (POSIX-kompatibel) printf '%s' "$name" | awk '{print toupper(substr($0,1,1)) substr($0,2)}' } # Zählt UP-Dienste einer Job-Gruppe count_up() { job="$1" echo "$SUCCESS_JSON" | jq -r --arg job "$job" \ '[.data.result[] | select(.metric.job == $job) | .value[1]] | map(tonumber) | add // 0' \ 2>/dev/null || echo "0" } count_total() { job="$1" echo "$SUCCESS_JSON" | jq -r --arg job "$job" \ '[.data.result[] | select(.metric.job == $job)] | length' \ 2>/dev/null || echo "0" } # ── Service-Rows HTML ──────────────────────────────────────────────────────── render_rows() { job="$1" instances="$(get_instances "$job")" if [ -z "$instances" ]; then printf 'Noch keine Daten — Blackbox Exporter lädt…\n' return fi echo "$instances" | while IFS= read -r inst; do [ -z "$inst" ] && continue success="$(get_success "$inst")" ms="$(get_duration_ms "$inst")" name="$(friendly_name "$inst")" tier_badge="$(get_tier_badge "$inst")" if [ "$success" = "1" ]; then status_class="up" status_text="UP" ms_html="${ms:+${ms}ms}" else status_class="down" status_text="DOWN" ms_html="" fi tier_html="" [ -n "$tier_badge" ] && tier_html=" $tier_badge" printf '%s%s%s%s %s\n' \ "$status_class" "$status_class" "$name" "$tier_html" "$inst" \ "$status_text" \ "${ms_html:-}" done } # ── Gesamtstatus ───────────────────────────────────────────────────────────── web_up="$(count_up blackbox-web)"; web_total="$(count_total blackbox-web)" api_up="$(count_up blackbox-api)"; api_total="$(count_total blackbox-api)" internal_up="$(count_up blackbox-internal)"; internal_total="$(count_total blackbox-internal)" infra_up="$(count_up blackbox-infra)"; infra_total="$(count_total blackbox-infra)" gpu_up="$(count_up blackbox-gpu)"; gpu_total="$(count_total blackbox-gpu)" total_up=$(( web_up + api_up + internal_up + infra_up + gpu_up )) total_all=$(( web_total + api_total + internal_total + infra_total + gpu_total )) total_down=$(( total_all - total_up )) if [ "$total_down" -eq 0 ] && [ "$total_all" -gt 0 ]; then overall_class="all-good" overall_icon="✓" overall_text="Alle Systeme operational" elif [ "$total_up" -gt $(( total_all / 2 )) ]; then overall_class="partial" overall_icon="⚠" overall_text="Teilweise Beeinträchtigungen (${total_down} Dienste down)" else overall_class="outage" overall_icon="✕" overall_text="Größerer Ausfall (${total_down} von ${total_all} Diensten down)" fi TIMESTAMP="$(date -u '+%d. %B %Y, %H:%M Uhr UTC')" # ── App Release Tiers (automatisch aus mana-apps.ts) ──────────────────────── # Gemountet als /mana-apps.ts (read-only) im Container. # Format je Zeile: id|name|tier|status MANA_APPS_TS="${MANA_APPS_TS:-/mana-apps.ts}" if [ -f "$MANA_APPS_TS" ]; then TIER_APPS="$(awk ' /^export const MANA_APPS/ { inside=1; next } /^];/ { inside=0 } !inside { next } /^\t\{/ { id=""; name=""; tier=""; st=""; archived=0 } /^\t\tid:/ { gsub(/.*id:[[:space:]]*'\''/, ""); gsub(/'\''.*/, ""); id=$0 } /^\t\tname:/ { gsub(/.*name:[[:space:]]*'\''/, ""); gsub(/'\''.*/, ""); name=$0 } /^\t\trequiredTier:/ { gsub(/.*requiredTier:[[:space:]]*'\''/, ""); gsub(/'\''.*/, ""); tier=$0 } /^\t\tstatus:/ { gsub(/.*status:[[:space:]]*'\''/, ""); gsub(/'\''.*/, ""); st=$0 } /^\t\tarchived:[[:space:]]*true/ { archived=1 } /^\t\},/ { if (id != "" && tier != "" && archived == 0) print id "|" name "|" tier "|" st } ' "$MANA_APPS_TS")" else echo "$(date '+%H:%M:%S') WARNUNG: $MANA_APPS_TS nicht gefunden — Tier-Badges deaktiviert" TIER_APPS="" fi # Gibt Tier-Badge-HTML für eine Blackbox-URL zurück (leer wenn kein Match) get_tier_badge() { url="$1" raw="${url#https://}" # Route-basierte URLs: mana.how/chat → chat case "$raw" in mana.how/*) appid="${raw#mana.how/}" ;; *) # Subdomain-basiert: todo.mana.how → todo appid="${raw%.mana.how*}" appid="${appid%/health}" ;; esac # API-Subdomains skippen (z.B. todo-api, chat-api) case "$appid" in *-api) return ;; esac # Aliase (Sonderfälle → aktuelle App-IDs) case "$appid" in mana.how) appid="mana" ;; manadeck) appid="cards" ;; esac echo "$TIER_APPS" | while IFS='|' read -r id name tier st; do [ "$id" = "$appid" ] || continue printf '%s' "$tier" "$tier" break done } # ── HTML generieren ────────────────────────────────────────────────────────── cat > "${OUTPUT}.tmp" << HTMLEOF Mana Status
${web_up}/${web_total}
Web Apps
${api_up}/${api_total}
API Backends
${internal_up}/${internal_total}
Interne
${infra_up}/${infra_total}
Infrastruktur
${gpu_up}/${gpu_total}
GPU Dienste

Web Apps

${web_up} von ${web_total} online
$(render_rows blackbox-web)

API Backends

${api_up} von ${api_total} online
$(render_rows blackbox-api)

Interne Dienste

${internal_up} von ${internal_total} online
$(render_rows blackbox-internal)

Infrastruktur

${infra_up} von ${infra_total} online
$(render_rows blackbox-infra)

GPU Dienste

${gpu_up} von ${gpu_total} online
$(render_rows blackbox-gpu)
HTMLEOF mv "${OUTPUT}.tmp" "$OUTPUT" echo "$(date '+%H:%M:%S') Status-Seite generiert → $OUTPUT (${total_up}/${total_all} online)" # ── status.json für ManaScore Live-Badge ───────────────────────────────────── JSON_OUTPUT="$(dirname "$OUTPUT")/status.json" TIMESTAMP_ISO="$(date -u '+%Y-%m-%dT%H:%M:%SZ')" # Tier-Daten als JSON-Array für jq. # Shell-native instead of multi-line awk in $() — alpine ash has parsing # quirks with escaped double quotes inside single-quoted awk programs # that caused the whole script to fail `sh -n` syntax check and skip # the jq invocation below (set -e). Building the JSON via a plain # while-read loop sidesteps the problem entirely. TIER_JSON="[]" if [ -n "${TIER_APPS:-}" ]; then tier_json_buf="[" tier_json_first=1 printf '%s\n' "$TIER_APPS" | while IFS='|' read -r tj_id tj_name tj_tier tj_st; do [ -z "$tj_id" ] && continue if [ "$tier_json_first" = "1" ]; then tier_json_first=0 else tier_json_buf="${tier_json_buf}," fi tier_json_buf="${tier_json_buf}{\"id\":\"${tj_id}\",\"name\":\"${tj_name}\",\"tier\":\"${tj_tier}\",\"status\":\"${tj_st}\"}" printf '%s' "$tier_json_buf" > /tmp/tier_json.part done if [ -s /tmp/tier_json.part ]; then TIER_JSON="$(cat /tmp/tier_json.part)]" rm -f /tmp/tier_json.part fi fi echo "$SUCCESS_JSON" | jq \ --arg ts "$TIMESTAMP_ISO" \ --argjson total_up "$total_up" \ --argjson total_all "$total_all" \ --argjson tiers "$TIER_JSON" \ '{ updated: $ts, summary: { up: $total_up, total: $total_all }, services: ( .data.result | map({ key: ( .metric.instance | ltrimstr("https://") | rtrimstr("/health") | rtrimstr("/") | if . == "mana.how" then "mana" elif startswith("mana.how/") then ltrimstr("mana.how/") else rtrimstr(".mana.how") end ), value: (.value[1] == "1") }) | from_entries ), tiers: ($tiers | group_by(.tier) | map({ key: .[0].tier, value: map({name, status}) }) | from_entries) }' > "${JSON_OUTPUT}.tmp" && mv "${JSON_OUTPUT}.tmp" "$JSON_OUTPUT" echo "$(date '+%H:%M:%S') status.json generiert → $JSON_OUTPUT"