mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 17:41:09 +02:00
feat(monitoring): add mana-geocoding + Pelias to prod compose, Prometheus, Grafana, and status.mana.how
Production deployment + observability for the self-hosted geocoding stack: **docker-compose.macmini.yml** - New mana-geocoding container (port 3018, internal-only — no traefik labels, no Cloudflare route). Uses host.docker.internal to reach the Pelias API on the host's pelias compose stack. Dockerfile added under services/mana-geocoding/ using the same Bun/Hono pattern as mana-events. **Prometheus** - New blackbox-internal job probing mana-geocoding:3018/health, the Pelias API on host.docker.internal:4000/v1/status, and Elasticsearch at host.docker.internal:9200/_cluster/health. Kept separate from blackbox-api which is reserved for public HTTPS endpoints. **status.mana.how (generate-status-page.sh)** - Include blackbox-internal in the metric query and add an "Interne Dienste" section with its own summary card, right between Infrastruktur and GPU Dienste. Summary grid goes from 4 to 5 columns with a 900px breakpoint. - friendly_name() now handles http:// URLs and rewrites container-name hosts like mana-geocoding:3018/health → "Mana Geocoding", host.docker.internal:4000 → "Pelias API", host.docker.internal:9200 → "Pelias Elasticsearch". **Grafana uptime dashboard** - Add an "Internal" series to the "Alle Dienste — Uptime-Verlauf" panel - New "Interne Dienste Status" table panel showing per-instance up/down - New "Geocoding Ø Latenz" stat panel for probe_duration_seconds Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0ba97672b1
commit
957060ca55
5 changed files with 949 additions and 155 deletions
|
|
@ -362,6 +362,40 @@ services:
|
|||
- "traefik.http.routers.mana-events.tls=true"
|
||||
- "traefik.http.services.mana-events.loadbalancer.server.port=3065"
|
||||
|
||||
# ─── Geocoding ───────────────────────────────────────────
|
||||
# Thin Hono wrapper in front of a self-hosted Pelias stack.
|
||||
# Pelias itself (elasticsearch + api + libpostal) runs from a separate
|
||||
# compose file in services/mana-geocoding/pelias/ — see
|
||||
# services/mana-geocoding/CLAUDE.md for the initial import procedure.
|
||||
# Internal-only: no traefik labels, not exposed via Cloudflare.
|
||||
mana-geocoding:
|
||||
build:
|
||||
context: services/mana-geocoding
|
||||
dockerfile: Dockerfile
|
||||
image: mana-geocoding:local
|
||||
container_name: mana-geocoding
|
||||
restart: always
|
||||
mem_limit: 128m
|
||||
# Pelias runs on host network via its own compose, so the wrapper
|
||||
# reaches it via host.docker.internal (Pelias API at :4000).
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
environment:
|
||||
TZ: Europe/Berlin
|
||||
PORT: 3018
|
||||
PELIAS_API_URL: http://host.docker.internal:4000/v1
|
||||
CORS_ORIGINS: https://mana.how,http://localhost:5173
|
||||
CACHE_MAX_ENTRIES: "5000"
|
||||
CACHE_TTL_MS: "86400000"
|
||||
ports:
|
||||
- "3018:3018"
|
||||
healthcheck:
|
||||
test: ["CMD", "bun", "-e", "fetch('http://127.0.0.1:3018/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"]
|
||||
interval: 120s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 15s
|
||||
|
||||
mana-user:
|
||||
build:
|
||||
context: services/mana-user
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -283,6 +283,27 @@ scrape_configs:
|
|||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
|
||||
# Internal-only services (not exposed via Cloudflare).
|
||||
# Probed over the Docker network so the blackbox exporter reaches
|
||||
# them by container name.
|
||||
- job_name: 'blackbox-internal'
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
static_configs:
|
||||
- targets:
|
||||
- http://mana-geocoding:3018/health
|
||||
# Pelias stack runs on host network, reached via host gateway
|
||||
- http://host.docker.internal:4000/v1/status
|
||||
- http://host.docker.internal:9200/_cluster/health
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
|
||||
# Infrastructure & Monitoring Tools
|
||||
- job_name: 'blackbox-infra'
|
||||
metrics_path: /probe
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@ fetch_metric() {
|
|||
2>/dev/null || echo '{"status":"error","data":{"result":[]}}'
|
||||
}
|
||||
|
||||
SUCCESS_JSON="$(fetch_metric 'probe_success{job=~"blackbox-web|blackbox-api|blackbox-infra|blackbox-gpu"}')"
|
||||
DURATION_JSON="$(fetch_metric 'probe_duration_seconds{job=~"blackbox-web|blackbox-api|blackbox-infra|blackbox-gpu"}')"
|
||||
SUCCESS_JSON="$(fetch_metric 'probe_success{job=~"blackbox-web|blackbox-api|blackbox-infra|blackbox-internal|blackbox-gpu"}')"
|
||||
DURATION_JSON="$(fetch_metric 'probe_duration_seconds{job=~"blackbox-web|blackbox-api|blackbox-infra|blackbox-internal|blackbox-gpu"}')"
|
||||
|
||||
# ── Hilfsfunktionen ─────────────────────────────────────────────────────────
|
||||
|
||||
|
|
@ -59,10 +59,20 @@ get_instances() {
|
|||
# Freundlicher Name aus URL
|
||||
friendly_name() {
|
||||
url="$1"
|
||||
# Entferne https://
|
||||
# Entferne https:// oder http://
|
||||
name="${url#https://}"
|
||||
# Route-basierte URLs: mana.how/chat → Chat
|
||||
name="${name#http://}"
|
||||
# Interne Services (Docker-Netz): mana-geocoding:3018/health → Mana Geocoding
|
||||
case "$name" in
|
||||
mana-geocoding:*)
|
||||
name="Mana Geocoding"
|
||||
;;
|
||||
host.docker.internal:4000*)
|
||||
name="Pelias API"
|
||||
;;
|
||||
host.docker.internal:9200*)
|
||||
name="Pelias Elasticsearch"
|
||||
;;
|
||||
mana.how/*)
|
||||
name="${name#mana.how/}"
|
||||
;;
|
||||
|
|
@ -71,8 +81,10 @@ friendly_name() {
|
|||
name="${name%.mana.how}"
|
||||
;;
|
||||
esac
|
||||
# Entferne /health suffix
|
||||
# Entferne /health, /_cluster/health, /v1/status suffixe
|
||||
name="${name%/health}"
|
||||
name="${name%/_cluster/health}"
|
||||
name="${name%/v1/status}"
|
||||
# mana.how (ohne Route) → Mana
|
||||
[ "$name" = "mana.how" ] && name="Mana"
|
||||
# Erster Buchstabe groß (POSIX-kompatibel)
|
||||
|
|
@ -131,11 +143,12 @@ render_rows() {
|
|||
|
||||
web_up="$(count_up blackbox-web)"; web_total="$(count_total blackbox-web)"
|
||||
api_up="$(count_up blackbox-api)"; api_total="$(count_total blackbox-api)"
|
||||
internal_up="$(count_up blackbox-internal)"; internal_total="$(count_total blackbox-internal)"
|
||||
infra_up="$(count_up blackbox-infra)"; infra_total="$(count_total blackbox-infra)"
|
||||
gpu_up="$(count_up blackbox-gpu)"; gpu_total="$(count_total blackbox-gpu)"
|
||||
|
||||
total_up=$(( web_up + api_up + infra_up + gpu_up ))
|
||||
total_all=$(( web_total + api_total + infra_total + gpu_total ))
|
||||
total_up=$(( web_up + api_up + internal_up + infra_up + gpu_up ))
|
||||
total_all=$(( web_total + api_total + internal_total + infra_total + gpu_total ))
|
||||
total_down=$(( total_all - total_up ))
|
||||
|
||||
if [ "$total_down" -eq 0 ] && [ "$total_all" -gt 0 ]; then
|
||||
|
|
@ -276,10 +289,11 @@ cat > "${OUTPUT}.tmp" << HTMLEOF
|
|||
/* ── Summary Row ── */
|
||||
.summary {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(4, 1fr);
|
||||
grid-template-columns: repeat(5, 1fr);
|
||||
gap: 12px;
|
||||
margin-bottom: 32px;
|
||||
}
|
||||
@media (max-width: 900px) { .summary { grid-template-columns: repeat(3, 1fr); } }
|
||||
@media (max-width: 600px) { .summary { grid-template-columns: repeat(2, 1fr); } }
|
||||
|
||||
.summary-card {
|
||||
|
|
@ -385,6 +399,10 @@ cat > "${OUTPUT}.tmp" << HTMLEOF
|
|||
<div class="count $([ "$api_up" -eq "$api_total" ] && echo green || echo yellow)">${api_up}/${api_total}</div>
|
||||
<div class="label">API Backends</div>
|
||||
</div>
|
||||
<div class="summary-card">
|
||||
<div class="count $([ "$internal_up" -eq "$internal_total" ] && echo green || echo yellow)">${internal_up}/${internal_total}</div>
|
||||
<div class="label">Interne</div>
|
||||
</div>
|
||||
<div class="summary-card">
|
||||
<div class="count $([ "$infra_up" -eq "$infra_total" ] && echo green || echo yellow)">${infra_up}/${infra_total}</div>
|
||||
<div class="label">Infrastruktur</div>
|
||||
|
|
@ -415,6 +433,16 @@ $(render_rows blackbox-api)
|
|||
</table>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<div class="section-header">
|
||||
<h2>Interne Dienste</h2>
|
||||
<span class="fraction">${internal_up} von ${internal_total} online</span>
|
||||
</div>
|
||||
<table>
|
||||
$(render_rows blackbox-internal)
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<div class="section-header">
|
||||
<h2>Infrastruktur</h2>
|
||||
|
|
|
|||
16
services/mana-geocoding/Dockerfile
Normal file
16
services/mana-geocoding/Dockerfile
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
FROM oven/bun:1 AS production
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY package.json bun.lock* ./
|
||||
RUN bun install --frozen-lockfile 2>/dev/null || bun install
|
||||
|
||||
COPY src ./src
|
||||
COPY tsconfig.json ./
|
||||
|
||||
EXPOSE 3018
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
|
||||
CMD bun -e "fetch('http://localhost:3018/health').then(r => process.exit(r.ok ? 0 : 1)).catch(() => process.exit(1))"
|
||||
|
||||
CMD ["bun", "run", "src/index.ts"]
|
||||
Loading…
Add table
Add a link
Reference in a new issue