From e8de377cfe1360a91667aec9feac2955d06736e0 Mon Sep 17 00:00:00 2001 From: Till JS Date: Wed, 8 Apr 2026 17:25:48 +0200 Subject: [PATCH] fix(macmini): mount prometheus config directly so /-/reload picks up edits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VictoriaMetrics + vmalert previously copied prometheus.yml/alerts.yml from /mnt/prometheus-config/ into /etc/prometheus/ at container start. The copy silently drifted from the host file whenever the container wasn't restarted — which is exactly what hid the matrix/element removal from status.mana.how until 2026-04-08, when VM was still actively scraping the deleted targets because its in-container config snapshot pre-dated the cleanup. Now both containers mount ./docker/prometheus directly into /etc/prometheus (resp. /etc/alerts) read-only and point the binary at it, and deploy.sh issues POST /-/reload to both after each deploy so config edits go live without a container recreate. Co-Authored-By: Claude Opus 4.6 (1M context) --- docker-compose.macmini.yml | 15 +++++++++++---- scripts/mac-mini/deploy.sh | 12 ++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/docker-compose.macmini.yml b/docker-compose.macmini.yml index 2d32c388b..ed2ae3d0f 100644 --- a/docker-compose.macmini.yml +++ b/docker-compose.macmini.yml @@ -1095,9 +1095,14 @@ services: container_name: mana-mon-victoria restart: always mem_limit: 256m - entrypoint: ["sh", "-c", "mkdir -p /etc/prometheus && cp /mnt/prometheus-config/*.yml /etc/prometheus/ 2>/dev/null; exec /victoria-metrics-prod -storageDataPath=/storage -retentionPeriod=2y -httpListenAddr=:9090 -promscrape.config=/etc/prometheus/prometheus.yml -promscrape.config.strictParse=false -selfScrapeInterval=15s -search.latencyOffset=0s"] + # Mount the host config dir read-only and point promscrape directly at it, + # so edits to docker/prometheus/prometheus.yml are picked up by POST /-/reload + # without a container restart. The previous setup baked a copy into + # /etc/prometheus/ at startup, which silently drifted from the host file + # whenever the container wasn't restarted (matrix removal incident, 2026-04-08). + entrypoint: ["/victoria-metrics-prod", "-storageDataPath=/storage", "-retentionPeriod=2y", "-httpListenAddr=:9090", "-promscrape.config=/etc/prometheus/prometheus.yml", "-promscrape.config.strictParse=false", "-selfScrapeInterval=15s", "-search.latencyOffset=0s"] volumes: - - ./docker/prometheus:/mnt/prometheus-config:ro + - ./docker/prometheus:/etc/prometheus:ro - victoriametrics_data:/storage ports: - "9090:9090" @@ -1302,9 +1307,11 @@ services: condition: service_healthy alertmanager: condition: service_healthy - entrypoint: ["sh", "-c", "mkdir -p /etc/alerts && cp /mnt/alerts-config/alerts.yml /etc/alerts/ 2>/dev/null; exec /vmalert-prod -datasource.url=http://victoriametrics:9090 -notifier.url=http://alertmanager:9093 -remoteWrite.url=http://victoriametrics:9090 -remoteRead.url=http://victoriametrics:9090 -rule='/etc/alerts/alerts.yml' -evaluationInterval=30s -httpListenAddr=:8880"] + # Same direct-mount pattern as victoriametrics above — see the comment + # there for the rationale. + entrypoint: ["/vmalert-prod", "-datasource.url=http://victoriametrics:9090", "-notifier.url=http://alertmanager:9093", "-remoteWrite.url=http://victoriametrics:9090", "-remoteRead.url=http://victoriametrics:9090", "-rule=/etc/alerts/alerts.yml", "-evaluationInterval=30s", "-httpListenAddr=:8880"] volumes: - - ./docker/prometheus:/mnt/alerts-config:ro + - ./docker/prometheus:/etc/alerts:ro ports: - "8880:8880" healthcheck: diff --git a/scripts/mac-mini/deploy.sh b/scripts/mac-mini/deploy.sh index cadc77aa1..c5ad97bcd 100755 --- a/scripts/mac-mini/deploy.sh +++ b/scripts/mac-mini/deploy.sh @@ -105,6 +105,18 @@ check_health "Clock Web" "http://localhost:5013/health" check_health "Contacts Backend" "http://localhost:3034/health" check_health "Contacts Web" "http://localhost:5014/health" +echo "" +echo "=== Reloading monitoring configs ===" +# Bind-mounted prometheus.yml/alerts.yml are now read live from +# docker/prometheus/ — but a running VM/vmalert needs an explicit reload to +# pick up edits without a container restart. +docker exec mana-mon-victoria wget -qO- --post-data= http://0.0.0.0:9090/-/reload \ + && echo " victoriametrics: reloaded" \ + || echo " victoriametrics: reload failed (container down?)" +docker exec mana-mon-vmalert wget -qO- --post-data= http://0.0.0.0:8880/-/reload \ + && echo " vmalert: reloaded" \ + || echo " vmalert: reload failed (container down?)" + echo "" echo "=== Deployment Complete ===" echo ""