From 8a5fad34dfd9835f62882114db1854816e983010 Mon Sep 17 00:00:00 2001 From: Till JS Date: Tue, 28 Apr 2026 22:19:21 +0200 Subject: [PATCH] fix(geocoding): bump PROVIDER_TIMEOUT_MS to 20s for cold cross-LAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cold-start fetches from the mana-geocoding container to photon-self on mana-gpu (over WSL2 mirrored networking) consistently take >10s on the first probe and ~2s once warm. The previous 8s default caused the chain to false-mark photon-self unhealthy on every cold path, leaking to public photon for the next 30s health-cache window — and pinning the public-photon answer in the 7d cache (now shortened to 1h). Also wires the docker-compose macmini env to honor PROVIDER_TIMEOUT_MS and CACHE_PUBLIC_TTL_MS overrides so production picks up the new values without a code rebuild. Co-Authored-By: Claude Opus 4.7 (1M context) --- docker-compose.macmini.yml | 8 ++++++++ services/mana-geocoding/CLAUDE.md | 5 ++++- services/mana-geocoding/src/config.ts | 11 ++++++----- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/docker-compose.macmini.yml b/docker-compose.macmini.yml index 324f874e8..7d99a6836 100644 --- a/docker-compose.macmini.yml +++ b/docker-compose.macmini.yml @@ -510,6 +510,14 @@ services: # as `photon-self` provider with privacy: 'local' — eligible for # sensitive queries. Empty value = slot disabled. PHOTON_SELF_API_URL: ${PHOTON_SELF_API_URL:-} + # Cold-start cross-LAN fetches to photon-self consistently take + # >10s on the first probe; the 8s default false-marked it unhealthy + # on every cold path. 20s leaves headroom while still cutting off + # actually-stuck connections. + PROVIDER_TIMEOUT_MS: ${PROVIDER_TIMEOUT_MS:-20000} + # Short public-API cache TTL so a transient photon-self blip can't + # pin stale public-fallback answers in the LRU for days. + CACHE_PUBLIC_TTL_MS: ${CACHE_PUBLIC_TTL_MS:-3600000} CORS_ORIGINS: https://mana.how,http://localhost:5173 CACHE_MAX_ENTRIES: "5000" CACHE_TTL_MS: "86400000" diff --git a/services/mana-geocoding/CLAUDE.md b/services/mana-geocoding/CLAUDE.md index 342d999a2..358752c8e 100644 --- a/services/mana-geocoding/CLAUDE.md +++ b/services/mana-geocoding/CLAUDE.md @@ -140,7 +140,10 @@ PORT=3018 # Default order: photon-self,photon,nominatim # `photon-self` is silently dropped if PHOTON_SELF_API_URL is unset. GEOCODING_PROVIDERS=photon-self,photon,nominatim -PROVIDER_TIMEOUT_MS=8000 # per-provider request timeout (cold-start safe) +PROVIDER_TIMEOUT_MS=20000 # per-provider request timeout. Cold-start + # cross-LAN fetches to photon-self take + # >10s on the first probe; tighter values + # false-mark it unhealthy on every cold path. PROVIDER_HEALTH_CACHE_MS=30000 # health-cache TTL — skip dead providers # --- Self-hosted Photon (privacy: 'local', PRIMARY since 2026-04-28) -- diff --git a/services/mana-geocoding/src/config.ts b/services/mana-geocoding/src/config.ts index ce4d9b0f1..fdac4220b 100644 --- a/services/mana-geocoding/src/config.ts +++ b/services/mana-geocoding/src/config.ts @@ -93,11 +93,12 @@ export function loadConfig(): Config { 'nominatim', ]), healthCacheMs: parseInt(process.env.PROVIDER_HEALTH_CACHE_MS || '30000', 10), - // 8 s default. Nominatim's cold-start DNS+TLS handshake can push the - // first health probe past the older 5 s default, false-marking the - // provider unhealthy for the next 30 s. 8 s survives a slow first - // probe but still cuts off actually-stuck connections. - timeoutMs: parseInt(process.env.PROVIDER_TIMEOUT_MS || '8000', 10), + // 20 s default. Cold-start cross-LAN fetches to photon-self + // (mana-gpu over WSL2 mirrored networking) consistently take + // >10 s on the first probe and ~2 s once warm. Tighter timeouts + // false-marked photon-self unhealthy on every cold path, leaking + // to public photon for the duration of the 30 s health cache. + timeoutMs: parseInt(process.env.PROVIDER_TIMEOUT_MS || '20000', 10), }, }; }