mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 17:41:09 +02:00
chore(infra): drop migrated services from Mini compose + tunnel config
Phase 2c+2d cleanup. The 14 services that moved to the GPU-Box stack (grafana, victoriametrics, loki, tempo, promtail, alertmanager, vmalert, pushgateway, blackbox-exporter, alert-notifier, umami, glitchtip + worker, forgejo) are now stopped on the Mini and stable on the GPU box, so the rollback insurance can come out: - docker-compose.macmini.yml: drop 14 service blocks (-369 lines) + the now-orphan named volumes (victoriametrics_data, loki_data, alertmanager_data, grafana_data, tempo_data). - cloudflared-config.yml: drop the four hostnames whose DNS already points at the mana-gpu-server tunnel — Mini-tunnel ingress for them has been dead routing since 2026-05-06, removing the rules just makes the file match reality. The hostnames now live in the GPU tunnel's dashboard config (token-managed). Containers + volumes stay on the Mini for now; running `docker compose -f docker-compose.macmini.yml --env-file .env.macmini up -d --remove-orphans` on the box drops them in one go when ready. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f422fd6779
commit
0db64cb47b
2 changed files with 0 additions and 387 deletions
|
|
@ -132,8 +132,6 @@ ingress:
|
|||
# ============================================
|
||||
# Forgejo (Git + CI/CD)
|
||||
# ============================================
|
||||
- hostname: git.mana.how
|
||||
service: http://localhost:3041
|
||||
|
||||
# ============================================
|
||||
# Standalone microservices
|
||||
|
|
@ -215,12 +213,6 @@ ingress:
|
|||
# ============================================
|
||||
# Monitoring & observability
|
||||
# ============================================
|
||||
- hostname: grafana.mana.how
|
||||
service: http://localhost:8000
|
||||
- hostname: stats.mana.how
|
||||
service: http://localhost:8010
|
||||
- hostname: glitchtip.mana.how
|
||||
service: http://localhost:8020
|
||||
|
||||
# ============================================
|
||||
# GPU services (NOT in this tunnel)
|
||||
|
|
|
|||
|
|
@ -202,56 +202,6 @@ services:
|
|||
# Tier 0b: Forgejo (Git + CI/CD + Registry)
|
||||
# ============================================
|
||||
|
||||
forgejo:
|
||||
image: codeberg.org/forgejo/forgejo:11
|
||||
container_name: mana-core-forgejo
|
||||
restart: always
|
||||
mem_limit: 512m
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
USER_UID: 1000
|
||||
USER_GID: 1000
|
||||
FORGEJO__database__DB_TYPE: postgres
|
||||
FORGEJO__database__HOST: postgres:5432
|
||||
FORGEJO__database__NAME: forgejo
|
||||
FORGEJO__database__USER: postgres
|
||||
FORGEJO__database__PASSWD: ${POSTGRES_PASSWORD:-mana123}
|
||||
FORGEJO__server__DOMAIN: git.mana.how
|
||||
FORGEJO__server__SSH_DOMAIN: git.mana.how
|
||||
FORGEJO__server__ROOT_URL: https://git.mana.how/
|
||||
FORGEJO__server__HTTP_PORT: 3000
|
||||
FORGEJO__server__SSH_PORT: 2222
|
||||
FORGEJO__server__LFS_START_SERVER: "true"
|
||||
FORGEJO__service__DISABLE_REGISTRATION: "true"
|
||||
FORGEJO__service__REQUIRE_SIGNIN_VIEW: "false"
|
||||
FORGEJO__actions__ENABLED: "true"
|
||||
FORGEJO__actions__DEFAULT_ACTIONS_URL: https://code.forgejo.org
|
||||
FORGEJO__packages__ENABLED: "true"
|
||||
FORGEJO__ui__DEFAULT_THEME: forgejo-dark
|
||||
FORGEJO__ui__SHOW_USER_EMAIL: "false"
|
||||
FORGEJO__mailer__ENABLED: "false"
|
||||
volumes:
|
||||
- /Volumes/ManaData/forgejo:/data
|
||||
ports:
|
||||
- "3041:3000"
|
||||
- "2222:22"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/api/v1/version"]
|
||||
interval: 120s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
# Forgejo runner removed — no macOS binary exists, Docker-based runner
|
||||
# can't access host filesystem/SSH for CD. GitHub CD handles deployment
|
||||
# via native self-hosted runner. Forgejo is kept as a mirror only.
|
||||
|
||||
# ============================================
|
||||
# Tier 1: Core Auth Service (Port 3001)
|
||||
# ============================================
|
||||
|
||||
mana-auth:
|
||||
build:
|
||||
context: .
|
||||
|
|
@ -1281,163 +1231,6 @@ services:
|
|||
# Tier 7: Monitoring Dashboards (Ports 8000-8099)
|
||||
# ============================================
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:10.4.1
|
||||
container_name: mana-mon-grafana
|
||||
restart: always
|
||||
mem_limit: 192m
|
||||
depends_on:
|
||||
victoriametrics:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_USER: admin
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_PASSWORD:-admin}
|
||||
GF_USERS_ALLOW_SIGN_UP: false
|
||||
GF_AUTH_ANONYMOUS_ENABLED: true
|
||||
GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer
|
||||
GF_SERVER_ROOT_URL: https://grafana.mana.how
|
||||
GF_SERVER_HTTP_PORT: 8000
|
||||
GF_INSTALL_PLUGINS: yesoreyeram-infinity-datasource
|
||||
GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH: /var/lib/grafana/dashboards/master-overview.json
|
||||
volumes:
|
||||
- ./docker/grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
- ./docker/grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||||
- grafana_data:/var/lib/grafana
|
||||
ports:
|
||||
- "8000:8000"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:8000/api/health"]
|
||||
interval: 300s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
umami:
|
||||
# Pinned away from postgresql-latest on 2026-04-23. The rolling
|
||||
# tag jumped to Umami 3.1.0 (Next.js 16) and started crashing the
|
||||
# container on every POST /api/send — page loaders hung on the
|
||||
# failing tracker request. v2.18.0 is the last known-stable v2.
|
||||
# Rolling back to v2 was safe here because the schema is shared
|
||||
# across 2.x. If you bump to v3 again, verify the DB migration
|
||||
# path and test /api/send with a real POST before committing.
|
||||
image: ghcr.io/umami-software/umami:postgresql-v2.18.0
|
||||
container_name: mana-mon-umami
|
||||
restart: always
|
||||
mem_limit: 384m
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: postgresql://postgres:${POSTGRES_PASSWORD:-mana123}@postgres:5432/umami
|
||||
DATABASE_TYPE: postgresql
|
||||
APP_SECRET: ${UMAMI_APP_SECRET:-change-me-umami-secret}
|
||||
DISABLE_TELEMETRY: 1
|
||||
ports:
|
||||
- "8010:3000"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3000/api/heartbeat"]
|
||||
interval: 300s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
# ============================================
|
||||
# Tier 8: Metrics & Exporters (Ports 9000-9199)
|
||||
# ============================================
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.99.0
|
||||
container_name: mana-mon-victoria
|
||||
restart: always
|
||||
mem_limit: 384m
|
||||
# Mount the host config dir read-only and point promscrape directly at it,
|
||||
# so edits to docker/prometheus/prometheus.yml are picked up by POST /-/reload
|
||||
# without a container restart. The previous setup baked a copy into
|
||||
# /etc/prometheus/ at startup, which silently drifted from the host file
|
||||
# whenever the container wasn't restarted (matrix removal incident, 2026-04-08).
|
||||
entrypoint: ["/victoria-metrics-prod", "-storageDataPath=/storage", "-retentionPeriod=2y", "-httpListenAddr=:9090", "-promscrape.config=/etc/prometheus/prometheus.yml", "-promscrape.config.strictParse=false", "-selfScrapeInterval=15s", "-search.latencyOffset=0s"]
|
||||
volumes:
|
||||
- ./docker/prometheus:/etc/prometheus:ro
|
||||
- victoriametrics_data:/storage
|
||||
ports:
|
||||
- "9090:9090"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9090/health"]
|
||||
interval: 300s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
tempo:
|
||||
image: grafana/tempo:2.6.1
|
||||
container_name: mana-mon-tempo
|
||||
restart: always
|
||||
mem_limit: 256m
|
||||
command: ["-config.file=/etc/tempo/tempo.yaml"]
|
||||
volumes:
|
||||
- ./docker/tempo:/etc/tempo:ro
|
||||
- tempo_data:/var/tempo
|
||||
ports:
|
||||
- "4318:4318" # OTLP HTTP receiver
|
||||
- "3200:3200" # Tempo API (for Grafana)
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3200/ready"]
|
||||
interval: 300s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
loki:
|
||||
image: grafana/loki:3.0.0
|
||||
container_name: mana-mon-loki
|
||||
restart: always
|
||||
mem_limit: 192m
|
||||
entrypoint: ["sh", "-c", "mkdir -p /etc/loki && cp /mnt/loki-config/*.yaml /etc/loki/ 2>/dev/null; exec /usr/bin/loki -config.file=/etc/loki/local-config.yaml"]
|
||||
volumes:
|
||||
- ./docker/loki:/mnt/loki-config:ro
|
||||
- loki_data:/loki
|
||||
ports:
|
||||
- "3100:3100"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3100/ready"]
|
||||
interval: 300s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 15s
|
||||
|
||||
promtail:
|
||||
image: grafana/promtail:3.0.0
|
||||
container_name: mana-mon-promtail
|
||||
restart: always
|
||||
mem_limit: 96m
|
||||
command: -config.file=/etc/promtail/config.yaml -config.expand-env=true
|
||||
volumes:
|
||||
- ./docker/promtail:/etc/promtail:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
depends_on:
|
||||
loki:
|
||||
condition: service_started
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9080/ready"]
|
||||
interval: 300s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
pushgateway:
|
||||
image: prom/pushgateway:v1.7.0
|
||||
container_name: mana-mon-pushgateway
|
||||
restart: always
|
||||
mem_limit: 48m
|
||||
ports:
|
||||
- "9091:9091"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9091/-/healthy"]
|
||||
interval: 300s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 20s
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:v0.49.1
|
||||
container_name: mana-mon-cadvisor
|
||||
|
|
@ -1541,112 +1334,6 @@ services:
|
|||
sleep 60
|
||||
done
|
||||
|
||||
blackbox-exporter:
|
||||
image: prom/blackbox-exporter:v0.25.0
|
||||
container_name: mana-mon-blackbox
|
||||
restart: always
|
||||
mem_limit: 128m
|
||||
# Use Cloudflare + Google public resolvers instead of Docker's
|
||||
# embedded DNS (127.0.0.11). Docker DNS forwards to the host
|
||||
# resolver which forwards to the home router (FRITZ!Box), and the
|
||||
# router keeps a stale negative cache for hours after a hostname
|
||||
# first fails. New CNAMEs (e.g. fresh GPU public hostnames added
|
||||
# via the Cloudflare dashboard) appear as "no such host" to the
|
||||
# blackbox probes for the entire negative-cache TTL even though
|
||||
# they resolve fine via 1.1.1.1 directly.
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 8.8.8.8
|
||||
command: ["--config.file=/etc/blackbox/blackbox.yml"]
|
||||
volumes:
|
||||
- ./docker/blackbox/blackbox.yml:/etc/blackbox/blackbox.yml:ro
|
||||
ports:
|
||||
- "9115:9115"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9115/"]
|
||||
interval: 300s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
# ============================================
|
||||
# Alerting Stack (Ports 9093-9095)
|
||||
# ============================================
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.99.0
|
||||
container_name: mana-mon-vmalert
|
||||
restart: always
|
||||
mem_limit: 64m
|
||||
depends_on:
|
||||
victoriametrics:
|
||||
condition: service_healthy
|
||||
alertmanager:
|
||||
condition: service_healthy
|
||||
# Same direct-mount pattern as victoriametrics above — see the comment
|
||||
# there for the rationale.
|
||||
entrypoint: ["/vmalert-prod", "-datasource.url=http://victoriametrics:9090", "-notifier.url=http://alertmanager:9093", "-remoteWrite.url=http://victoriametrics:9090", "-remoteRead.url=http://victoriametrics:9090", "-rule=/etc/alerts/alerts.yml", "-evaluationInterval=30s", "-httpListenAddr=:8880"]
|
||||
volumes:
|
||||
- ./docker/prometheus:/etc/alerts:ro
|
||||
ports:
|
||||
- "8880:8880"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:8880/health"]
|
||||
interval: 300s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 25s
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.27.0
|
||||
container_name: mana-mon-alertmanager
|
||||
restart: always
|
||||
mem_limit: 64m
|
||||
depends_on:
|
||||
alert-notifier:
|
||||
condition: service_healthy
|
||||
command: ["--config.file=/etc/alertmanager/alertmanager.yml", "--storage.path=/alertmanager", "--web.listen-address=:9093"]
|
||||
volumes:
|
||||
- ./docker/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
|
||||
- alertmanager_data:/alertmanager
|
||||
ports:
|
||||
- "9093:9093"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9093/-/healthy"]
|
||||
interval: 300s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 25s
|
||||
|
||||
alert-notifier:
|
||||
build:
|
||||
context: ./docker/alert-notifier
|
||||
dockerfile: Dockerfile
|
||||
image: alert-notifier:local
|
||||
container_name: mana-mon-alert-notifier
|
||||
restart: always
|
||||
# Tier-3 right-size 2026-04-28: live RSS ~25 MiB (79%) — at OOM
|
||||
# risk during alert-burst when many alerts queue at once. Bumped
|
||||
# to 48m.
|
||||
mem_limit: 48m
|
||||
environment:
|
||||
PORT: 8080
|
||||
TELEGRAM_BOT_TOKEN: ${TELEGRAM_BOT_TOKEN:-}
|
||||
TELEGRAM_CHAT_ID: ${TELEGRAM_CHAT_ID:-}
|
||||
NTFY_TOPIC: ${NTFY_TOPIC:-}
|
||||
ports:
|
||||
- "9095:8080"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:8080/health"]
|
||||
interval: 300s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 25s
|
||||
|
||||
# ============================================
|
||||
# Auto-Update (Watchtower)
|
||||
# ============================================
|
||||
|
||||
watchtower:
|
||||
image: nickfedor/watchtower:latest
|
||||
container_name: mana-auto-watchtower
|
||||
|
|
@ -1669,62 +1356,6 @@ services:
|
|||
# GlitchTip Error Tracking (Sentry-compatible)
|
||||
# ============================================
|
||||
|
||||
glitchtip:
|
||||
image: glitchtip/glitchtip:latest
|
||||
container_name: mana-mon-glitchtip
|
||||
restart: always
|
||||
mem_limit: 384m
|
||||
environment:
|
||||
DATABASE_URL: postgres://postgres:${POSTGRES_PASSWORD:-mana123}@postgres:5432/glitchtip
|
||||
REDIS_URL: redis://:${REDIS_PASSWORD:-redis123}@redis:6379/1
|
||||
SECRET_KEY: ${GLITCHTIP_SECRET_KEY:-change-me-in-production}
|
||||
PORT: "8020"
|
||||
GLITCHTIP_DOMAIN: https://glitchtip.mana.how
|
||||
DEFAULT_FROM_EMAIL: glitchtip@mana.how
|
||||
CELERY_WORKER_AUTOSCALE: "1,3"
|
||||
ENABLE_USER_REGISTRATION: "true"
|
||||
ports:
|
||||
- "8020:8020"
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8020/_health/')"]
|
||||
interval: 300s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
glitchtip-worker:
|
||||
image: glitchtip/glitchtip:latest
|
||||
container_name: mana-mon-glitchtip-worker
|
||||
restart: always
|
||||
mem_limit: 192m
|
||||
command: ./bin/run-celery-with-beat.sh
|
||||
environment:
|
||||
DATABASE_URL: postgres://postgres:${POSTGRES_PASSWORD:-mana123}@postgres:5432/glitchtip
|
||||
REDIS_URL: redis://:${REDIS_PASSWORD:-redis123}@redis:6379/1
|
||||
SECRET_KEY: ${GLITCHTIP_SECRET_KEY:-change-me-in-production}
|
||||
GLITCHTIP_DOMAIN: https://glitchtip.mana.how
|
||||
CELERY_WORKER_AUTOSCALE: "1,3"
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
|
||||
# ============================================
|
||||
# Unified API Server
|
||||
# ============================================
|
||||
# apps/api — Hono/Bun process that hosts all 16 product compute
|
||||
# modules (calendar, todo, chat, picture, planta, food, news,
|
||||
# traces, moodlit, presi, music, contacts, storage, context, guides,
|
||||
# research, who) on a single port. Replaces ~17 per-product backend
|
||||
# containers from the pre-consolidation era; the unified Mana web
|
||||
# app's compute calls all flow through here.
|
||||
|
||||
mana-api:
|
||||
build:
|
||||
context: .
|
||||
|
|
@ -1821,17 +1452,7 @@ services:
|
|||
volumes:
|
||||
redis_data:
|
||||
name: mana-redis-data
|
||||
victoriametrics_data:
|
||||
name: mana-victoria-data
|
||||
alertmanager_data:
|
||||
name: mana-alertmanager-data
|
||||
grafana_data:
|
||||
name: mana-grafana-data
|
||||
analytics_data:
|
||||
name: mana-analytics-data
|
||||
loki_data:
|
||||
name: mana-loki-data
|
||||
stalwart_data:
|
||||
name: mana-stalwart-data
|
||||
tempo_data:
|
||||
name: mana-tempo-data
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue