chore(infra): drop migrated services from Mini compose + tunnel config

Phase 2c+2d cleanup. The 14 services that moved to the GPU-Box stack (grafana, victoriametrics, loki, tempo, promtail, alertmanager, vmalert, pushgateway, blackbox-exporter, alert-notifier, umami, glitchtip + worker, forgejo) are now stopped on the Mini and stable on the GPU box, so the rollback insurance can come out: - docker-compose.macmini.yml: drop 14 service blocks (-369 lines) + the now-orphan named volumes (victoriametrics_data, loki_data, alertmanager_data, grafana_data, tempo_data). - cloudflared-config.yml: drop the four hostnames whose DNS already points at the mana-gpu-server tunnel — Mini-tunnel ingress for them has been dead routing since 2026-05-06, removing the rules just makes the file match reality. The hostnames now live in the GPU tunnel's dashboard config (token-managed). Containers + volumes stay on the Mini for now; running `docker compose -f docker-compose.macmini.yml --env-file .env.macmini up -d --remove-orphans` on the box drops them in one go when ready. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 17:41:09 +02:00 · 2026-05-07 02:39:43 +02:00 · 2026-05-07 02:39:43 +02:00 · 0db64cb47b
commit 0db64cb47b
parent f422fd6779
2 changed files with 0 additions and 387 deletions
--- a/cloudflared-config.yml
+++ b/cloudflared-config.yml
@ -132,8 +132,6 @@ ingress:
  # ============================================
  # Forgejo (Git + CI/CD)
  # ============================================
-  - hostname: git.mana.how
-    service: http://localhost:3041

  # ============================================
  # Standalone microservices
@ -215,12 +213,6 @@ ingress:
  # ============================================
  # Monitoring & observability
  # ============================================
-  - hostname: grafana.mana.how
-    service: http://localhost:8000
-  - hostname: stats.mana.how
-    service: http://localhost:8010
-  - hostname: glitchtip.mana.how
-    service: http://localhost:8020

  # ============================================
  # GPU services (NOT in this tunnel)
--- a/docker-compose.macmini.yml
+++ b/docker-compose.macmini.yml
@ -202,56 +202,6 @@ services:
  # Tier 0b: Forgejo (Git + CI/CD + Registry)
  # ============================================

-  forgejo:
-    image: codeberg.org/forgejo/forgejo:11
-    container_name: mana-core-forgejo
-    restart: always
-    mem_limit: 512m
-    depends_on:
-      postgres:
-        condition: service_healthy
-    environment:
-      USER_UID: 1000
-      USER_GID: 1000
-      FORGEJO__database__DB_TYPE: postgres
-      FORGEJO__database__HOST: postgres:5432
-      FORGEJO__database__NAME: forgejo
-      FORGEJO__database__USER: postgres
-      FORGEJO__database__PASSWD: ${POSTGRES_PASSWORD:-mana123}
-      FORGEJO__server__DOMAIN: git.mana.how
-      FORGEJO__server__SSH_DOMAIN: git.mana.how
-      FORGEJO__server__ROOT_URL: https://git.mana.how/
-      FORGEJO__server__HTTP_PORT: 3000
-      FORGEJO__server__SSH_PORT: 2222
-      FORGEJO__server__LFS_START_SERVER: "true"
-      FORGEJO__service__DISABLE_REGISTRATION: "true"
-      FORGEJO__service__REQUIRE_SIGNIN_VIEW: "false"
-      FORGEJO__actions__ENABLED: "true"
-      FORGEJO__actions__DEFAULT_ACTIONS_URL: https://code.forgejo.org
-      FORGEJO__packages__ENABLED: "true"
-      FORGEJO__ui__DEFAULT_THEME: forgejo-dark
-      FORGEJO__ui__SHOW_USER_EMAIL: "false"
-      FORGEJO__mailer__ENABLED: "false"
-    volumes:
-      - /Volumes/ManaData/forgejo:/data
-    ports:
-      - "3041:3000"
-      - "2222:22"
-    healthcheck:
-      test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/api/v1/version"]
-      interval: 120s
-      timeout: 10s
-      retries: 3
-      start_period: 30s
-
-  # Forgejo runner removed — no macOS binary exists, Docker-based runner
-  # can't access host filesystem/SSH for CD. GitHub CD handles deployment
-  # via native self-hosted runner. Forgejo is kept as a mirror only.
-
-  # ============================================
-  # Tier 1: Core Auth Service (Port 3001)
-  # ============================================
-
  mana-auth:
    build:
      context: .
@ -1281,163 +1231,6 @@ services:
  # Tier 7: Monitoring Dashboards (Ports 8000-8099)
  # ============================================

-  grafana:
-    image: grafana/grafana:10.4.1
-    container_name: mana-mon-grafana
-    restart: always
-    mem_limit: 192m
-    depends_on:
-      victoriametrics:
-        condition: service_healthy
-    environment:
-      GF_SECURITY_ADMIN_USER: admin
-      GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_PASSWORD:-admin}
-      GF_USERS_ALLOW_SIGN_UP: false
-      GF_AUTH_ANONYMOUS_ENABLED: true
-      GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer
-      GF_SERVER_ROOT_URL: https://grafana.mana.how
-      GF_SERVER_HTTP_PORT: 8000
-      GF_INSTALL_PLUGINS: yesoreyeram-infinity-datasource
-      GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH: /var/lib/grafana/dashboards/master-overview.json
-    volumes:
-      - ./docker/grafana/provisioning:/etc/grafana/provisioning:ro
-      - ./docker/grafana/dashboards:/var/lib/grafana/dashboards:ro
-      - grafana_data:/var/lib/grafana
-    ports:
-      - "8000:8000"
-    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:8000/api/health"]
-      interval: 300s
-      timeout: 10s
-      retries: 3
-      start_period: 10s
-
-  umami:
-    # Pinned away from postgresql-latest on 2026-04-23. The rolling
-    # tag jumped to Umami 3.1.0 (Next.js 16) and started crashing the
-    # container on every POST /api/send — page loaders hung on the
-    # failing tracker request. v2.18.0 is the last known-stable v2.
-    # Rolling back to v2 was safe here because the schema is shared
-    # across 2.x. If you bump to v3 again, verify the DB migration
-    # path and test /api/send with a real POST before committing.
-    image: ghcr.io/umami-software/umami:postgresql-v2.18.0
-    container_name: mana-mon-umami
-    restart: always
-    mem_limit: 384m
-    depends_on:
-      postgres:
-        condition: service_healthy
-    environment:
-      DATABASE_URL: postgresql://postgres:${POSTGRES_PASSWORD:-mana123}@postgres:5432/umami
-      DATABASE_TYPE: postgresql
-      APP_SECRET: ${UMAMI_APP_SECRET:-change-me-umami-secret}
-      DISABLE_TELEMETRY: 1
-    ports:
-      - "8010:3000"
-    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3000/api/heartbeat"]
-      interval: 300s
-      timeout: 10s
-      retries: 3
-      start_period: 30s
-
-  # ============================================
-  # Tier 8: Metrics & Exporters (Ports 9000-9199)
-  # ============================================
-
-  victoriametrics:
-    image: victoriametrics/victoria-metrics:v1.99.0
-    container_name: mana-mon-victoria
-    restart: always
-    mem_limit: 384m
-    # Mount the host config dir read-only and point promscrape directly at it,
-    # so edits to docker/prometheus/prometheus.yml are picked up by POST /-/reload
-    # without a container restart. The previous setup baked a copy into
-    # /etc/prometheus/ at startup, which silently drifted from the host file
-    # whenever the container wasn't restarted (matrix removal incident, 2026-04-08).
-    entrypoint: ["/victoria-metrics-prod", "-storageDataPath=/storage", "-retentionPeriod=2y", "-httpListenAddr=:9090", "-promscrape.config=/etc/prometheus/prometheus.yml", "-promscrape.config.strictParse=false", "-selfScrapeInterval=15s", "-search.latencyOffset=0s"]
-    volumes:
-      - ./docker/prometheus:/etc/prometheus:ro
-      - victoriametrics_data:/storage
-    ports:
-      - "9090:9090"
-    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9090/health"]
-      interval: 300s
-      timeout: 10s
-      retries: 3
-      start_period: 10s
-
-  tempo:
-    image: grafana/tempo:2.6.1
-    container_name: mana-mon-tempo
-    restart: always
-    mem_limit: 256m
-    command: ["-config.file=/etc/tempo/tempo.yaml"]
-    volumes:
-      - ./docker/tempo:/etc/tempo:ro
-      - tempo_data:/var/tempo
-    ports:
-      - "4318:4318"   # OTLP HTTP receiver
-      - "3200:3200"   # Tempo API (for Grafana)
-    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3200/ready"]
-      interval: 300s
-      timeout: 10s
-      retries: 3
-      start_period: 10s
-
-  loki:
-    image: grafana/loki:3.0.0
-    container_name: mana-mon-loki
-    restart: always
-    mem_limit: 192m
-    entrypoint: ["sh", "-c", "mkdir -p /etc/loki && cp /mnt/loki-config/*.yaml /etc/loki/ 2>/dev/null; exec /usr/bin/loki -config.file=/etc/loki/local-config.yaml"]
-    volumes:
-      - ./docker/loki:/mnt/loki-config:ro
-      - loki_data:/loki
-    ports:
-      - "3100:3100"
-    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3100/ready"]
-      interval: 300s
-      timeout: 10s
-      retries: 3
-      start_period: 15s
-
-  promtail:
-    image: grafana/promtail:3.0.0
-    container_name: mana-mon-promtail
-    restart: always
-    mem_limit: 96m
-    command: -config.file=/etc/promtail/config.yaml -config.expand-env=true
-    volumes:
-      - ./docker/promtail:/etc/promtail:ro
-      - /var/run/docker.sock:/var/run/docker.sock:ro
-    depends_on:
-      loki:
-        condition: service_started
-    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9080/ready"]
-      interval: 300s
-      timeout: 10s
-      retries: 3
-      start_period: 10s
-
-  pushgateway:
-    image: prom/pushgateway:v1.7.0
-    container_name: mana-mon-pushgateway
-    restart: always
-    mem_limit: 48m
-    ports:
-      - "9091:9091"
-    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9091/-/healthy"]
-      interval: 300s
-      timeout: 10s
-      retries: 3
-      start_period: 20s
-
  cadvisor:
    image: gcr.io/cadvisor/cadvisor:v0.49.1
    container_name: mana-mon-cadvisor
@ -1541,112 +1334,6 @@ services:
          sleep 60
        done

-  blackbox-exporter:
-    image: prom/blackbox-exporter:v0.25.0
-    container_name: mana-mon-blackbox
-    restart: always
-    mem_limit: 128m
-    # Use Cloudflare + Google public resolvers instead of Docker's
-    # embedded DNS (127.0.0.11). Docker DNS forwards to the host
-    # resolver which forwards to the home router (FRITZ!Box), and the
-    # router keeps a stale negative cache for hours after a hostname
-    # first fails. New CNAMEs (e.g. fresh GPU public hostnames added
-    # via the Cloudflare dashboard) appear as "no such host" to the
-    # blackbox probes for the entire negative-cache TTL even though
-    # they resolve fine via 1.1.1.1 directly.
-    dns:
-      - 1.1.1.1
-      - 8.8.8.8
-    command: ["--config.file=/etc/blackbox/blackbox.yml"]
-    volumes:
-      - ./docker/blackbox/blackbox.yml:/etc/blackbox/blackbox.yml:ro
-    ports:
-      - "9115:9115"
-    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9115/"]
-      interval: 300s
-      timeout: 10s
-      retries: 3
-      start_period: 10s
-
-  # ============================================
-  # Alerting Stack (Ports 9093-9095)
-  # ============================================
-
-  vmalert:
-    image: victoriametrics/vmalert:v1.99.0
-    container_name: mana-mon-vmalert
-    restart: always
-    mem_limit: 64m
-    depends_on:
-      victoriametrics:
-        condition: service_healthy
-      alertmanager:
-        condition: service_healthy
-    # Same direct-mount pattern as victoriametrics above — see the comment
-    # there for the rationale.
-    entrypoint: ["/vmalert-prod", "-datasource.url=http://victoriametrics:9090", "-notifier.url=http://alertmanager:9093", "-remoteWrite.url=http://victoriametrics:9090", "-remoteRead.url=http://victoriametrics:9090", "-rule=/etc/alerts/alerts.yml", "-evaluationInterval=30s", "-httpListenAddr=:8880"]
-    volumes:
-      - ./docker/prometheus:/etc/alerts:ro
-    ports:
-      - "8880:8880"
-    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:8880/health"]
-      interval: 300s
-      timeout: 10s
-      retries: 3
-      start_period: 25s
-
-  alertmanager:
-    image: prom/alertmanager:v0.27.0
-    container_name: mana-mon-alertmanager
-    restart: always
-    mem_limit: 64m
-    depends_on:
-      alert-notifier:
-        condition: service_healthy
-    command: ["--config.file=/etc/alertmanager/alertmanager.yml", "--storage.path=/alertmanager", "--web.listen-address=:9093"]
-    volumes:
-      - ./docker/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
-      - alertmanager_data:/alertmanager
-    ports:
-      - "9093:9093"
-    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9093/-/healthy"]
-      interval: 300s
-      timeout: 10s
-      retries: 3
-      start_period: 25s
-
-  alert-notifier:
-    build:
-      context: ./docker/alert-notifier
-      dockerfile: Dockerfile
-    image: alert-notifier:local
-    container_name: mana-mon-alert-notifier
-    restart: always
-    # Tier-3 right-size 2026-04-28: live RSS ~25 MiB (79%) — at OOM
-    # risk during alert-burst when many alerts queue at once. Bumped
-    # to 48m.
-    mem_limit: 48m
-    environment:
-      PORT: 8080
-      TELEGRAM_BOT_TOKEN: ${TELEGRAM_BOT_TOKEN:-}
-      TELEGRAM_CHAT_ID: ${TELEGRAM_CHAT_ID:-}
-      NTFY_TOPIC: ${NTFY_TOPIC:-}
-    ports:
-      - "9095:8080"
-    healthcheck:
-      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:8080/health"]
-      interval: 300s
-      timeout: 5s
-      retries: 3
-      start_period: 25s
-
-  # ============================================
-  # Auto-Update (Watchtower)
-  # ============================================
-
  watchtower:
    image: nickfedor/watchtower:latest
    container_name: mana-auto-watchtower
@ -1669,62 +1356,6 @@ services:
  # GlitchTip Error Tracking (Sentry-compatible)
  # ============================================

-  glitchtip:
-    image: glitchtip/glitchtip:latest
-    container_name: mana-mon-glitchtip
-    restart: always
-    mem_limit: 384m
-    environment:
-      DATABASE_URL: postgres://postgres:${POSTGRES_PASSWORD:-mana123}@postgres:5432/glitchtip
-      REDIS_URL: redis://:${REDIS_PASSWORD:-redis123}@redis:6379/1
-      SECRET_KEY: ${GLITCHTIP_SECRET_KEY:-change-me-in-production}
-      PORT: "8020"
-      GLITCHTIP_DOMAIN: https://glitchtip.mana.how
-      DEFAULT_FROM_EMAIL: glitchtip@mana.how
-      CELERY_WORKER_AUTOSCALE: "1,3"
-      ENABLE_USER_REGISTRATION: "true"
-    ports:
-      - "8020:8020"
-    depends_on:
-      postgres:
-        condition: service_healthy
-      redis:
-        condition: service_healthy
-    healthcheck:
-      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8020/_health/')"]
-      interval: 300s
-      timeout: 10s
-      retries: 3
-      start_period: 30s
-
-  glitchtip-worker:
-    image: glitchtip/glitchtip:latest
-    container_name: mana-mon-glitchtip-worker
-    restart: always
-    mem_limit: 192m
-    command: ./bin/run-celery-with-beat.sh
-    environment:
-      DATABASE_URL: postgres://postgres:${POSTGRES_PASSWORD:-mana123}@postgres:5432/glitchtip
-      REDIS_URL: redis://:${REDIS_PASSWORD:-redis123}@redis:6379/1
-      SECRET_KEY: ${GLITCHTIP_SECRET_KEY:-change-me-in-production}
-      GLITCHTIP_DOMAIN: https://glitchtip.mana.how
-      CELERY_WORKER_AUTOSCALE: "1,3"
-    depends_on:
-      postgres:
-        condition: service_healthy
-      redis:
-        condition: service_healthy
-
-  # ============================================
-  # Unified API Server
-  # ============================================
-  # apps/api — Hono/Bun process that hosts all 16 product compute
-  # modules (calendar, todo, chat, picture, planta, food, news,
-  # traces, moodlit, presi, music, contacts, storage, context, guides,
-  # research, who) on a single port. Replaces ~17 per-product backend
-  # containers from the pre-consolidation era; the unified Mana web
-  # app's compute calls all flow through here.
-
  mana-api:
    build:
      context: .
@ -1821,17 +1452,7 @@ services:
 volumes:
  redis_data:
    name: mana-redis-data
-  victoriametrics_data:
-    name: mana-victoria-data
-  alertmanager_data:
-    name: mana-alertmanager-data
-  grafana_data:
-    name: mana-grafana-data
  analytics_data:
    name: mana-analytics-data
-  loki_data:
-    name: mana-loki-data
  stalwart_data:
    name: mana-stalwart-data
-  tempo_data:
-    name: mana-tempo-data