From c84742005b5db0d0a57f3a55801f9ce7b079e13b Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 7 May 2026 20:26:10 +0200 Subject: [PATCH] =?UTF-8?q?infra(phase=202g):=20mana-research=20=E2=86=92?= =?UTF-8?q?=20GPU-Box?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Web-Research-Orchestrator (16+ search-/LLM-providers) auf die GPU-Box verlagert. Cross-LAN für mana-auth/mana-credits/mana-llm/mana-search/ postgres/redis (192.168.178.131). research.mana.how routet jetzt zum mana-gpu-server-Tunnel (CF config v29). Mini-Container-Count 42 → 41. PUBLIC_MANA_RESEARCH_URL in mana-app-web auf https-URL umgestellt — Mini-Container können 192.168.178.11 nicht direkt erreichen (Colima-NAT), daher Cross-LAN-Bridge via Cloudflare-Tunnel wie bei mana-ai. Co-Authored-By: Claude Opus 4.7 (1M context) --- cloudflared-config.yml | 2 - docker-compose.macmini.yml | 66 ++--------------------- docker/prometheus/prometheus.yml | 1 + docs/PLAN_OPTION_C.md | 1 + infrastructure/.env.gpu-box.example | 19 +++++++ infrastructure/README.md | 5 ++ infrastructure/docker-compose.gpu-box.yml | 49 +++++++++++++++++ 7 files changed, 80 insertions(+), 63 deletions(-) diff --git a/cloudflared-config.yml b/cloudflared-config.yml index 1673f4724..e0340a173 100644 --- a/cloudflared-config.yml +++ b/cloudflared-config.yml @@ -148,8 +148,6 @@ ingress: service: http://localhost:3063 - hostname: events.mana.how service: http://localhost:3065 - - hostname: research.mana.how - service: http://localhost:3068 - hostname: cards-api.mana.how service: http://localhost:3072 - hostname: feedback.mana.how diff --git a/docker-compose.macmini.yml b/docker-compose.macmini.yml index 7d8bc7e6d..7406c346b 100644 --- a/docker-compose.macmini.yml +++ b/docker-compose.macmini.yml @@ -343,66 +343,10 @@ services: retries: 3 start_period: 15s - mana-research: - build: - context: . - dockerfile: services/mana-research/Dockerfile - image: mana-research:local - container_name: mana-research - restart: always - # Tier-3 right-size 2026-04-28: live RSS ~57 MiB, 4× headroom is enough. - mem_limit: 128m - depends_on: - postgres: - condition: service_healthy - redis: - condition: service_started - mana-credits: - condition: service_healthy - mana-search: - condition: service_started - environment: - TZ: Europe/Berlin - NODE_ENV: production - PORT: 3068 - DATABASE_URL: postgresql://postgres:${POSTGRES_PASSWORD:-mana123}@postgres:5432/mana_platform - # Redis requires auth (see redis service `--requirepass`). Without the - # password here the cache layer degrades to every-request-missing with - # NOAUTH log spam. Cache misses are not fatal (the executor just - # proxies straight to the upstream provider), but the log noise - # drowns out real errors in grafana/glitchtip. - REDIS_URL: redis://:${REDIS_PASSWORD:-redis123}@redis:6379 - MANA_AUTH_URL: http://mana-auth:3001 - MANA_LLM_URL: http://mana-llm:3025 - MANA_CREDITS_URL: http://mana-credits:3002 - MANA_SEARCH_URL: http://mana-search:3021 - MANA_SERVICE_KEY: ${MANA_SERVICE_KEY} - CACHE_TTL_SECONDS: 3600 - BRAVE_API_KEY: ${BRAVE_API_KEY:-} - TAVILY_API_KEY: ${TAVILY_API_KEY:-} - EXA_API_KEY: ${EXA_API_KEY:-} - SERPER_API_KEY: ${SERPER_API_KEY:-} - JINA_API_KEY: ${JINA_API_KEY:-} - FIRECRAWL_API_KEY: ${FIRECRAWL_API_KEY:-} - SCRAPINGBEE_API_KEY: ${SCRAPINGBEE_API_KEY:-} - PERPLEXITY_API_KEY: ${PERPLEXITY_API_KEY:-} - ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} - OPENAI_API_KEY: ${OPENAI_API_KEY:-} - GOOGLE_GENAI_API_KEY: ${GOOGLE_GENAI_API_KEY:-} - CORS_ORIGINS: https://mana.how,https://chat.mana.how,https://research.mana.how - ports: - - "3068:3068" - healthcheck: - test: ["CMD", "bun", "-e", "fetch('http://127.0.0.1:3068/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"] - interval: 120s - timeout: 10s - retries: 3 - start_period: 15s - labels: - - "traefik.enable=true" - - "traefik.http.routers.mana-research.rule=Host(`research.mana.how`)" - - "traefik.http.routers.mana-research.tls=true" - - "traefik.http.services.mana-research.loadbalancer.server.port=3068" + # mana-research moved to GPU-Box on 2026-05-07 (Phase 2g). Source-of-truth + # is now mana-monorepo/infrastructure/docker-compose.gpu-box.yml. DNS for + # research.mana.how points at the mana-gpu-server tunnel; cross-LAN to + # mana-credits + mana-search + postgres + redis on 192.168.178.131. mana-events: build: @@ -930,7 +874,7 @@ services: # internal docker-network URL. Without this pair, the SSR- # injected window.__PUBLIC_MANA_RESEARCH_URL__ is empty string # and research fetches fall back to the current origin (404). - PUBLIC_MANA_RESEARCH_URL: http://mana-research:3068 + PUBLIC_MANA_RESEARCH_URL: https://research.mana.how PUBLIC_MANA_RESEARCH_URL_CLIENT: https://research.mana.how # mana-analytics — public-feedback hub. Browser hits the # /api/v1/(public/)feedback/* endpoints directly; SSR uses the diff --git a/docker/prometheus/prometheus.yml b/docker/prometheus/prometheus.yml index 2325eb3e7..88f43842f 100644 --- a/docker/prometheus/prometheus.yml +++ b/docker/prometheus/prometheus.yml @@ -300,6 +300,7 @@ scrape_configs: - https://memoro-api.mana.how/health - https://memoro-audio.mana.how/health - https://mana-ai.mana.how/health + - https://research.mana.how/health # who.mana.how API on /api/decks — root is 404 by design (Phaser-Cantina mounts at /cantina) - https://who-api.mana.how/api/decks # Verein backoffice (mana e.V. Plattform); only /health returns 200, root is auth-walled diff --git a/docs/PLAN_OPTION_C.md b/docs/PLAN_OPTION_C.md index 88e967ded..aa26d32e8 100644 --- a/docs/PLAN_OPTION_C.md +++ b/docs/PLAN_OPTION_C.md @@ -18,6 +18,7 @@ Production-Hot-Path bleibt unverändert auf dem Mini. | Phase 2d — Glitchtip mit dediziertem DB-Stack | ✅ | 4 Container neu (mana-mon-glitchtip + worker + dedizierte glitchtip-postgres + glitchtip-redis). Mini-Postgres scheiterte bei `logs.0001_initial`-Partition-Creation mit OS-level "Permission denied" (macOS-Docker-Storage-Quirk auf externer SSD). Auf der GPU-Box mit Linux-ext4 saubere 333-Tabellen-Migration. Worker enqueuet UND finished Tasks → DB-Writes funktional (vorher hingen sie ewig). Public-Hostname `glitchtip.mana.how` → mana-gpu-server-Tunnel (config v23). | | Phase 2e — Status-Page auf GPU-Box | ✅ | 2 Container neu (`mana-mon-status-gen` + `mana-mon-status-nginx`). Sparse `/srv/mana/source` mit `mana-source-pull.timer` (stündlich) hostet das `generate-status-page.sh` und `mana-apps.ts`. status-gen schreibt in das Docker-Volume `status-output`, das status-nginx auf `:8090` ausliefert. Public-Hostname `status.mana.how` → mana-gpu-server-Tunnel (config v25). Bonus: behebt den Inode-Stale-Bind-Mount-Bug, der auf dem Mini bei jedem CD-`git checkout -f` die Status-Page kaputt machte. `vm.mana.how` (Phase-2c-Workaround für Mini→GPU-VM-Routing) wurde wieder aus dem Tunnel entfernt — VM ist nicht mehr public. | | Phase 2f — drei weitere Hilfsdienste verlagert | ✅ | (1) **verdaccio** (npm.mana.how, was im mana-platform-Repo): Volume tar-stream + Config-bundle in mana-monorepo (`infrastructure/verdaccio/config.yaml`). (2) **news-ingester** (Bun-Background-Tick): Cross-LAN-DB zur Mini-Postgres. Cross-arch-Limit aufgedeckt — `docker save\|load` zwischen Mini (arm64) und GPU-Box (x86_64) wirft `exec format error`, daher nativer Build mit GPU-Box-eigenem Dockerfile in `infrastructure/news-ingester/` der `@mana/shared-rss` als `file:`-ref vendored. (3) **mana-ai** (AI Mission Runner): Cross-LAN für mana-api/mana-llm/mana-research, RSA-Key-Sync (`MANA_AI_PRIVATE_KEY_PEM`), `mana-ai.mana.how` zum GPU-Tunnel (config v28). Bonus: AI Mission Runner sitzt jetzt im selben docker-network wie gpu-llm/gpu-ollama — künftige direct-LLM-Pfade ohne Cloudflare-Round-Trip. Mini Container 44 → 42. | +| Phase 2g — mana-research auslagern | ✅ | Web-Research-Orchestrator mit 16+ Search-/LLM-Providern. Nativer Build via workspace-Dockerfile (sparse-checkout `services/mana-research` + `packages/{shared-research,shared-types,shared-hono,shared-logger}`). Cross-LAN zu mana-auth/mana-credits/mana-llm/mana-search/postgres/redis (alle auf 192.168.178.131); Redis-Auth via `REDIS_PASSWORD` aus Mini's `.env.macmini` übernommen. `research.mana.how` zum GPU-Tunnel umgebogen via Cloudflare-API (config v29). Beide `PUBLIC_MANA_RESEARCH_URL`-Vars in mana-app-web auf https-URL umgestellt — gleicher Cross-LAN-Bridge-Pattern wie mana-ai (Mini-Container können 192.168.178.11 nicht direkt erreichen, daher Tunnel-Roundtrip). Mini Container 42 → 41. | | Phase 3 — Daten-Migration | n/a | Alle migrierten Apps lesen Mini-Postgres direkt — keine separate Datenmigration | | Phase 4 — Cloudflare-Cutover | ✅ | API-Approach via `cert.pem` apiToken: PUT `/accounts/.../cfd_tunnel/.../configurations` für GPU-Tunnel, dann `cloudflared tunnel route dns --overwrite-dns`. Kein Dashboard-Klick nötig. 3 Hostnames live (grafana/git/stats) | | Phase 5 — Mini-Compose aufräumen | ✅ | 3 Blöcke in `cloudflared-config.yml` auskommentiert (Backup angelegt), cloudflared neu geladen, Mini-Container `mana-mon-grafana` + `mana-mon-umami` gestoppt (nicht entfernt — Rollback bleibt möglich) | diff --git a/infrastructure/.env.gpu-box.example b/infrastructure/.env.gpu-box.example index f4f21d35d..e27d01ead 100644 --- a/infrastructure/.env.gpu-box.example +++ b/infrastructure/.env.gpu-box.example @@ -34,3 +34,22 @@ GLITCHTIP_ADMIN_PASSWORD= # in mana-auth's MANA_AI_PUBLIC_KEY_PEM auf dem Mini steht). MANA_SERVICE_KEY= MANA_AI_PRIVATE_KEY_PEM= + +# ─── Redis-Mini Auth (für Cross-LAN-Cache von mana-research, Phase 2g) ─── +# Identisch mit mana-monorepo/.env.macmini REDIS_PASSWORD. +REDIS_PASSWORD= + +# ─── mana-research (Web-Research-Orchestrator, Phase 2g) ───── +# 16 Search-/LLM-Provider — alle optional, leer lassen wenn ungenutzt. +# Aktuell aktiv im Live-Betrieb: GOOGLE_GENAI_API_KEY (von Mini übernommen). +GOOGLE_GENAI_API_KEY= +ANTHROPIC_API_KEY= +OPENAI_API_KEY= +PERPLEXITY_API_KEY= +EXA_API_KEY= +TAVILY_API_KEY= +BRAVE_API_KEY= +SERPER_API_KEY= +JINA_API_KEY= +FIRECRAWL_API_KEY= +SCRAPINGBEE_API_KEY= diff --git a/infrastructure/README.md b/infrastructure/README.md index e24fb3a98..bf39da9f2 100644 --- a/infrastructure/README.md +++ b/infrastructure/README.md @@ -21,6 +21,10 @@ Hilfsdienste vom Mini abgegeben — siehe [`docs/PLAN_OPTION_C.md`](../docs/PLAN | `gpu-node-exporter`, `gpu-cadvisor`, `gpu-promtail` | (intern) | Self-Monitoring (Phase 2c) | | `glitchtip` + worker + dedizierte postgres + redis | `:8020` → `glitchtip.mana.how` | Error-Tracking mit eigenem DB-Stack (Phase 2d) | | `status-page-gen`, `status-nginx` | `:8090` → `status.mana.how` | Status-Seite (Phase 2e) | +| `verdaccio` | `:4873` → `npm.mana.how` | Private @mana/* npm-Registry (Phase 2f-1) | +| `news-ingester` | (intern) | RSS-Crawl + News-Ingestion (Phase 2f-2) | +| `mana-ai` | `:3067` → `mana-ai.mana.how` | AI Mission Runner (Phase 2f-3) | +| `mana-research` | `:3068` → `research.mana.how` | Web-Research-Orchestrator (Phase 2g) | Plus der bestehende `photon`-Container (Geocoder), der vor Phase 2 schon auf der Box existierte und unangetastet blieb. @@ -81,6 +85,7 @@ Aktive Public-Hostnames (Stand 2026-05-07, config v28): | `photon.mana.how` | `:2322` | Photon Geocoder (cross-LAN-Workaround für mana-geocoding's Probe + privacy-local Provider) | | `npm.mana.how` | `:4873` | Verdaccio @mana/* npm-Registry (Phase 2f-1) | | `mana-ai.mana.how` | `:3067` | AI Mission Runner (Phase 2f-3) | +| `research.mana.how` | `:3068` | Web-Research-Orchestrator (Phase 2g) | API-Update (idempotent): diff --git a/infrastructure/docker-compose.gpu-box.yml b/infrastructure/docker-compose.gpu-box.yml index 12e53d10b..35d3d167c 100644 --- a/infrastructure/docker-compose.gpu-box.yml +++ b/infrastructure/docker-compose.gpu-box.yml @@ -523,6 +523,55 @@ services: retries: 3 start_period: 30s + # ============================================ + # Phase 2g — mana-research (Web Research Orchestrator, 2026-05-07) + # 16+ search-providers + LLM-Pipeline. User-facing aber latenztolerant + # (5-30s queries). Schreibt in mana_platform.research-Tabellen, nutzt + # Redis-Cache + ruft mana-credits + mana-search auf. Geographische Nähe + # zu gpu-llm/gpu-ollama wie bei mana-ai. + # ============================================ + mana-research: + build: + context: /srv/mana/source + dockerfile: services/mana-research/Dockerfile + image: mana-research:gpu-box + container_name: mana-research + restart: unless-stopped + environment: + TZ: Europe/Berlin + NODE_ENV: production + PORT: 3068 + DATABASE_URL: postgresql://postgres:${POSTGRES_PASSWORD}@192.168.178.131:5432/mana_platform + REDIS_URL: redis://:${REDIS_PASSWORD}@192.168.178.131:6379 + CACHE_TTL_SECONDS: '3600' + CORS_ORIGINS: https://mana.how,https://chat.mana.how,https://research.mana.how + # Cross-LAN service deps + MANA_AUTH_URL: http://192.168.178.131:3001 + MANA_CREDITS_URL: http://192.168.178.131:3002 + MANA_LLM_URL: http://192.168.178.131:3025 + MANA_SEARCH_URL: http://192.168.178.131:3012 + MANA_SERVICE_KEY: ${MANA_SERVICE_KEY} + # LLM + Search provider keys (most empty, only Google active currently) + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} + OPENAI_API_KEY: ${OPENAI_API_KEY:-} + GOOGLE_GENAI_API_KEY: ${GOOGLE_GENAI_API_KEY:-} + PERPLEXITY_API_KEY: ${PERPLEXITY_API_KEY:-} + EXA_API_KEY: ${EXA_API_KEY:-} + TAVILY_API_KEY: ${TAVILY_API_KEY:-} + BRAVE_API_KEY: ${BRAVE_API_KEY:-} + SERPER_API_KEY: ${SERPER_API_KEY:-} + JINA_API_KEY: ${JINA_API_KEY:-} + FIRECRAWL_API_KEY: ${FIRECRAWL_API_KEY:-} + SCRAPINGBEE_API_KEY: ${SCRAPINGBEE_API_KEY:-} + ports: + - '3068:3068' + healthcheck: + test: ['CMD', 'bun', '-e', "fetch('http://localhost:3068/health').then(r => process.exit(r.ok ? 0 : 1)).catch(() => process.exit(1))"] + interval: 60s + timeout: 10s + retries: 3 + start_period: 30s + # ============================================ # Phase 2f-3 — mana-ai (AI Mission Runner, 2026-05-07) # Background tick-loop (60s default), queryt mana-api + mana-llm +