fix(geocoding): bump PROVIDER_TIMEOUT_MS to 20s for cold cross-LAN

Cold-start fetches from the mana-geocoding container to photon-self
on mana-gpu (over WSL2 mirrored networking) consistently take >10s on
the first probe and ~2s once warm. The previous 8s default caused the
chain to false-mark photon-self unhealthy on every cold path, leaking
to public photon for the next 30s health-cache window — and pinning
the public-photon answer in the 7d cache (now shortened to 1h).

Also wires the docker-compose macmini env to honor PROVIDER_TIMEOUT_MS
and CACHE_PUBLIC_TTL_MS overrides so production picks up the new
values without a code rebuild.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-28 22:19:21 +02:00
parent 962606b961
commit 8a5fad34df
3 changed files with 18 additions and 6 deletions

View file

@ -510,6 +510,14 @@ services:
# as `photon-self` provider with privacy: 'local' — eligible for # as `photon-self` provider with privacy: 'local' — eligible for
# sensitive queries. Empty value = slot disabled. # sensitive queries. Empty value = slot disabled.
PHOTON_SELF_API_URL: ${PHOTON_SELF_API_URL:-} PHOTON_SELF_API_URL: ${PHOTON_SELF_API_URL:-}
# Cold-start cross-LAN fetches to photon-self consistently take
# >10s on the first probe; the 8s default false-marked it unhealthy
# on every cold path. 20s leaves headroom while still cutting off
# actually-stuck connections.
PROVIDER_TIMEOUT_MS: ${PROVIDER_TIMEOUT_MS:-20000}
# Short public-API cache TTL so a transient photon-self blip can't
# pin stale public-fallback answers in the LRU for days.
CACHE_PUBLIC_TTL_MS: ${CACHE_PUBLIC_TTL_MS:-3600000}
CORS_ORIGINS: https://mana.how,http://localhost:5173 CORS_ORIGINS: https://mana.how,http://localhost:5173
CACHE_MAX_ENTRIES: "5000" CACHE_MAX_ENTRIES: "5000"
CACHE_TTL_MS: "86400000" CACHE_TTL_MS: "86400000"

View file

@ -140,7 +140,10 @@ PORT=3018
# Default order: photon-self,photon,nominatim # Default order: photon-self,photon,nominatim
# `photon-self` is silently dropped if PHOTON_SELF_API_URL is unset. # `photon-self` is silently dropped if PHOTON_SELF_API_URL is unset.
GEOCODING_PROVIDERS=photon-self,photon,nominatim GEOCODING_PROVIDERS=photon-self,photon,nominatim
PROVIDER_TIMEOUT_MS=8000 # per-provider request timeout (cold-start safe) PROVIDER_TIMEOUT_MS=20000 # per-provider request timeout. Cold-start
# cross-LAN fetches to photon-self take
# >10s on the first probe; tighter values
# false-mark it unhealthy on every cold path.
PROVIDER_HEALTH_CACHE_MS=30000 # health-cache TTL — skip dead providers PROVIDER_HEALTH_CACHE_MS=30000 # health-cache TTL — skip dead providers
# --- Self-hosted Photon (privacy: 'local', PRIMARY since 2026-04-28) -- # --- Self-hosted Photon (privacy: 'local', PRIMARY since 2026-04-28) --

View file

@ -93,11 +93,12 @@ export function loadConfig(): Config {
'nominatim', 'nominatim',
]), ]),
healthCacheMs: parseInt(process.env.PROVIDER_HEALTH_CACHE_MS || '30000', 10), healthCacheMs: parseInt(process.env.PROVIDER_HEALTH_CACHE_MS || '30000', 10),
// 8 s default. Nominatim's cold-start DNS+TLS handshake can push the // 20 s default. Cold-start cross-LAN fetches to photon-self
// first health probe past the older 5 s default, false-marking the // (mana-gpu over WSL2 mirrored networking) consistently take
// provider unhealthy for the next 30 s. 8 s survives a slow first // >10 s on the first probe and ~2 s once warm. Tighter timeouts
// probe but still cuts off actually-stuck connections. // false-marked photon-self unhealthy on every cold path, leaking
timeoutMs: parseInt(process.env.PROVIDER_TIMEOUT_MS || '8000', 10), // to public photon for the duration of the 30 s health cache.
timeoutMs: parseInt(process.env.PROVIDER_TIMEOUT_MS || '20000', 10),
}, },
}; };
} }