From 2bbcf14abaa0de88bb5bdaab01f1c495898ba1ac Mon Sep 17 00:00:00 2001 From: Till JS Date: Tue, 28 Apr 2026 22:12:26 +0200 Subject: [PATCH] chore(geocoding): remove Pelias + close 3 bypass paths to public Nominatim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pelias was retired from the Mac mini on 2026-04-28; photon-self (self-hosted Photon on mana-gpu) has been the live primary since then. This removes the now-dead Pelias adapter, config, tests, and the services/mana-geocoding/pelias/ stack — the entire compose file, the geojsonify_place_details.js patch, the setup.sh import script. Provider chain is now `photon-self → photon → nominatim`. The chain keeps its `privacy: 'local' | 'public'` split, sensitive-query blocking, coord quantization, and aggressive caching unchanged. Three direct calls to nominatim.openstreetmap.org that bypassed mana-geocoding now route through the wrapper: - citycorners/add-city + citycorners/cities/[slug]/add use the shared searchAddress() client (browser → same-origin proxy → mana-geocoding → photon-self). - memoro mobile drops its OSM reverse-geocoding fallback entirely; Expo's on-device reverse-geocoding stays as the sole path. Routing through the wrapper would require a memoro-server proxy endpoint — a follow-up if Expo's quality proves insufficient. Other behavioral changes: - CACHE_PUBLIC_TTL_MS dropped from 7d → 1h. The long TTL was a privacy-amplification trick from the Pelias era; with photon-self serving the bulk of traffic, a transient cross-LAN blip was pinning cached fallback answers for days. 1h gives quick recovery. - /health/pelias renamed to /health/photon-self; prometheus blackbox config + status-page generator updated. - mana-geocoding container no longer needs `extra_hosts: host.docker.internal:host-gateway` (was only there for the Pelias-on-host-network era). 113 tests passing. CLAUDE.md rewritten to reflect the post-Pelias architecture. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/mana/apps/web/src/lib/geocoding/index.ts | 32 +- .../(app)/citycorners/add-city/+page.svelte | 11 +- .../cities/[slug]/add/+page.svelte | 11 +- .../api/v1/geocode/[...path]/+server.ts | 4 +- .../features/location/locationService.ts | 65 +-- docker-compose.macmini.yml | 12 +- docker/prometheus/prometheus.yml | 4 +- scripts/generate-status-page.sh | 4 +- services/mana-geocoding/CLAUDE.md | 374 ++++++++---------- .../mana-geocoding/pelias/docker-compose.yml | 121 ------ .../pelias/geojsonify_place_details.js | 123 ------ services/mana-geocoding/pelias/pelias.json | 52 --- services/mana-geocoding/pelias/setup.sh | 35 -- services/mana-geocoding/scripts/smoke-test.sh | 2 +- .../mana-geocoding/src/__tests__/app.test.ts | 3 +- services/mana-geocoding/src/app.ts | 15 +- services/mana-geocoding/src/config.ts | 28 +- services/mana-geocoding/src/index.ts | 21 +- .../src/lib/__tests__/category-map.test.ts | 184 --------- .../lib/__tests__/osm-category-map.test.ts | 6 +- services/mana-geocoding/src/lib/cache.ts | 13 +- .../mana-geocoding/src/lib/category-map.ts | 87 +--- .../src/lib/osm-category-map.ts | 14 +- services/mana-geocoding/src/lib/privacy.ts | 2 +- .../mana-geocoding/src/lib/sensitive-query.ts | 2 +- .../src/providers/__tests__/chain.test.ts | 69 ++-- .../__tests__/nominatim-normalizer.test.ts | 14 +- .../__tests__/photon-normalizer.test.ts | 4 +- .../mana-geocoding/src/providers/chain.ts | 8 +- .../mana-geocoding/src/providers/nominatim.ts | 6 +- .../mana-geocoding/src/providers/pelias.ts | 178 --------- .../mana-geocoding/src/providers/photon.ts | 16 +- .../mana-geocoding/src/providers/types.ts | 14 +- services/mana-geocoding/src/routes/geocode.ts | 8 +- services/mana-geocoding/src/routes/health.ts | 50 ++- 35 files changed, 330 insertions(+), 1262 deletions(-) delete mode 100644 services/mana-geocoding/pelias/docker-compose.yml delete mode 100644 services/mana-geocoding/pelias/geojsonify_place_details.js delete mode 100644 services/mana-geocoding/pelias/pelias.json delete mode 100755 services/mana-geocoding/pelias/setup.sh delete mode 100644 services/mana-geocoding/src/lib/__tests__/category-map.test.ts delete mode 100644 services/mana-geocoding/src/providers/pelias.ts diff --git a/apps/mana/apps/web/src/lib/geocoding/index.ts b/apps/mana/apps/web/src/lib/geocoding/index.ts index 0993bb9b8..52888b9d5 100644 --- a/apps/mana/apps/web/src/lib/geocoding/index.ts +++ b/apps/mana/apps/web/src/lib/geocoding/index.ts @@ -1,9 +1,11 @@ /** * Shared geocoding client for all modules in the unified Mana app. * - * Talks to our self-hosted mana-geocoding service (Pelias-backed, port 3018). - * All queries stay within our infrastructure — no user location data leaves - * the network. + * Talks to mana-geocoding (port 3018), which fronts a provider chain + * (photon-self → public photon → public nominatim) with sensitive-query + * blocking and coord quantization. Sensitive + happy-path queries stay + * on our infrastructure via photon-self; only last-resort fallbacks + * leave the network. * * Used by: places, events, contacts, photos, … * @@ -66,26 +68,24 @@ export interface GeocodingResult { longitude: number; address: GeocodingAddress; category: PlaceCategory; - /** Raw Pelias categories (food, retail, transport, …) — only present - * when the result came from Pelias. */ - peliasCategories?: string[]; confidence: number; - /** Which backend served this result. `pelias` is local; `photon` and - * `nominatim` are public APIs (the wrapper applies sensitive-query - * blocking + coord quantization before forwarding to those). */ - provider?: 'pelias' | 'photon' | 'nominatim'; + /** Which backend served this result. `photon-self` is our self-hosted + * Photon (privacy: 'local'); `photon` and `nominatim` are public APIs + * (the wrapper applies sensitive-query blocking + coord quantization + * before forwarding to those). */ + provider?: 'photon-self' | 'photon' | 'nominatim'; } /** * Out-of-band information returned alongside results — the wrapper uses * this to signal *why* a query had unusual behavior: * - * - `'fallback_used'`: Pelias was unreachable, so a public-API provider - * served the request. Results are still valid but may be less precise. - * UI should show a subtle "approximate" badge. + * - `'fallback_used'`: photon-self was unreachable, so a public-API + * provider served the request. Results are still valid but may be + * less precise. UI should show a subtle "approximate" badge. * - `'sensitive_local_unavailable'`: the query matched the wrapper's * sensitive-keyword list (medical / mental-health / crisis service) - * AND the local Pelias was unreachable. The wrapper deliberately did + * AND no local provider was reachable. The wrapper deliberately did * NOT forward the query to public APIs. Results are empty by design. * UI should explain this to the user. */ @@ -95,7 +95,7 @@ interface GeocodingResponse { results: GeocodingResult[]; cached?: boolean; error?: string; - provider?: 'pelias' | 'photon' | 'nominatim'; + provider?: 'photon-self' | 'photon' | 'nominatim'; notice?: GeocodingNotice; } @@ -109,7 +109,7 @@ interface GeocodingResponse { */ export interface SearchOutcome { results: GeocodingResult[]; - provider?: 'pelias' | 'photon' | 'nominatim'; + provider?: 'photon-self' | 'photon' | 'nominatim'; notice?: GeocodingNotice; } diff --git a/apps/mana/apps/web/src/routes/(app)/citycorners/add-city/+page.svelte b/apps/mana/apps/web/src/routes/(app)/citycorners/add-city/+page.svelte index dfdb5f76c..b15e14ba0 100644 --- a/apps/mana/apps/web/src/routes/(app)/citycorners/add-city/+page.svelte +++ b/apps/mana/apps/web/src/routes/(app)/citycorners/add-city/+page.svelte @@ -6,6 +6,7 @@ import { cityTable, useAllCities } from '$lib/modules/citycorners'; import type { LocalCity } from '$lib/modules/citycorners/types'; import { RoutePage } from '$lib/components/shell'; + import { searchAddress } from '$lib/geocoding'; const allCities = useAllCities(); @@ -44,14 +45,10 @@ geocoding = true; try { const searchQ = country.trim() ? `${q}, ${country.trim()}` : q; - const res = await fetch( - `https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(searchQ)}&limit=1`, - { headers: { 'User-Agent': 'CityCorners/1.0' } } - ); - const results = await res.json(); + const results = await searchAddress(searchQ, { limit: 1 }); if (results.length > 0) { - latitude = parseFloat(results[0].lat); - longitude = parseFloat(results[0].lon); + latitude = results[0].latitude; + longitude = results[0].longitude; } } catch { // best-effort diff --git a/apps/mana/apps/web/src/routes/(app)/citycorners/cities/[slug]/add/+page.svelte b/apps/mana/apps/web/src/routes/(app)/citycorners/cities/[slug]/add/+page.svelte index 11faa52c9..1d55981d1 100644 --- a/apps/mana/apps/web/src/routes/(app)/citycorners/cities/[slug]/add/+page.svelte +++ b/apps/mana/apps/web/src/routes/(app)/citycorners/cities/[slug]/add/+page.svelte @@ -8,6 +8,7 @@ import { ccLocationTable, CATEGORY_KEYS } from '$lib/modules/citycorners'; import type { LocalCity, LocalLocation } from '$lib/modules/citycorners/types'; import { RoutePage } from '$lib/components/shell'; + import { searchAddress } from '$lib/geocoding'; const cityCtx = getContext<{ value: LocalCity | undefined }>('currentCity'); let city = $derived(cityCtx.value); @@ -58,14 +59,10 @@ cityName && !addr.toLowerCase().includes(cityName.toLowerCase()) ? `${addr}, ${cityName}` : addr; - const res = await fetch( - `https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(q)}&limit=1`, - { headers: { 'User-Agent': 'CityCorners/1.0' } } - ); - const results = await res.json(); + const results = await searchAddress(q, { limit: 1 }); if (results.length > 0) { - latitude = parseFloat(results[0].lat); - longitude = parseFloat(results[0].lon); + latitude = results[0].latitude; + longitude = results[0].longitude; } } catch { // Geocoding is best-effort diff --git a/apps/mana/apps/web/src/routes/api/v1/geocode/[...path]/+server.ts b/apps/mana/apps/web/src/routes/api/v1/geocode/[...path]/+server.ts index affe43163..52d9e9ee1 100644 --- a/apps/mana/apps/web/src/routes/api/v1/geocode/[...path]/+server.ts +++ b/apps/mana/apps/web/src/routes/api/v1/geocode/[...path]/+server.ts @@ -25,8 +25,8 @@ * If we ever want to rate-limit by user we can add JWT verification here * without touching the upstream service. * - * Also proxies /health and /health/pelias so the SvelteKit status page - * (/status) can check the service from its server side. + * Also proxies /health and /health/photon-self so the SvelteKit status + * page (/status) can check the service from its server side. */ import { error } from '@sveltejs/kit'; diff --git a/apps/memoro/apps/mobile/features/location/locationService.ts b/apps/memoro/apps/mobile/features/location/locationService.ts index 237e14670..292ca1114 100644 --- a/apps/memoro/apps/mobile/features/location/locationService.ts +++ b/apps/memoro/apps/mobile/features/location/locationService.ts @@ -155,71 +155,26 @@ export const reverseGeocodeWithExpo = async ( }; /** - * Führt ein Reverse Geocoding mit OpenStreetMap/Nominatim durch - * @param latitude Breitengrad - * @param longitude Längengrad - * @returns Adressinformationen oder null bei Fehler - */ -export const reverseGeocodeWithOSM = async ( - latitude: number, - longitude: number -): Promise => { - try { - const url = `https://nominatim.openstreetmap.org/reverse?format=json&lat=${latitude}&lon=${longitude}&addressdetails=1`; - - const response = await fetch(url, { - headers: { - 'User-Agent': 'Memoro App', // OSM erfordert einen User-Agent - }, - }); - - if (!response.ok) { - throw new Error(`OSM API responded with status: ${response.status}`); - } - - const data = await response.json(); - - if (data && data.address) { - return { - street: data.address.road || data.address.pedestrian || data.address.street, - streetNumber: data.address.house_number, - postalCode: data.address.postcode, - city: data.address.city || data.address.town || data.address.village, - district: data.address.suburb || data.address.neighbourhood, - region: data.address.state, - country: data.address.country, - name: data.name, - formattedAddress: data.display_name, - }; - } - return null; - } catch (error) { - console.debug('Fehler beim Reverse Geocoding mit OSM:', error); - return null; - } -}; - -/** - * Führt ein Reverse Geocoding durch und versucht, die beste verfügbare Adresse zu ermitteln - * @param latitude Breitengrad - * @param longitude Längengrad - * @returns Adressinformationen oder null bei Fehler + * Führt ein Reverse Geocoding durch. Nutzt ausschließlich Expo's + * On-Device Reverse-Geocoding — keine direkten Calls an + * nominatim.openstreetmap.org, weil das die User-IP + Coords ungeschützt + * an einen Public-Service leakt. Wenn Expo keine Adresse liefert, + * geben wir null zurück. + * + * Falls Expo's Qualität auf Dauer nicht reicht, ist der richtige Fix + * ein Proxy-Endpoint im memoro-server, der intern an mana-geocoding + * weiterreicht (Privacy-Hardening + Photon-Self). */ export const getAddressFromCoordinates = async ( latitude: number, longitude: number ): Promise => { try { - // Zuerst mit Expo versuchen const expoResult = await reverseGeocodeWithExpo(latitude, longitude); - - // Wenn Expo ein gutes Ergebnis liefert, dieses verwenden if (expoResult && expoResult.street && expoResult.city) { return expoResult; } - - // Ansonsten mit OSM versuchen - return await reverseGeocodeWithOSM(latitude, longitude); + return expoResult; } catch (error) { console.debug('Fehler beim Reverse Geocoding:', error); return null; diff --git a/docker-compose.macmini.yml b/docker-compose.macmini.yml index b197be848..324f874e8 100644 --- a/docker-compose.macmini.yml +++ b/docker-compose.macmini.yml @@ -491,10 +491,9 @@ services: - "traefik.http.services.mana-events.loadbalancer.server.port=3065" # ─── Geocoding ─────────────────────────────────────────── - # Thin Hono wrapper in front of a self-hosted Pelias stack. - # Pelias itself (elasticsearch + api + libpostal) runs from a separate - # compose file in services/mana-geocoding/pelias/ — see - # services/mana-geocoding/CLAUDE.md for the initial import procedure. + # Thin Hono wrapper in front of self-hosted Photon (mana-gpu) with + # public photon.komoot.io and Nominatim as last-resort fallbacks. + # See services/mana-geocoding/CLAUDE.md for provider-chain details. # Internal-only: no traefik labels, not exposed via Cloudflare. mana-geocoding: build: @@ -504,14 +503,9 @@ services: container_name: mana-geocoding restart: always mem_limit: 128m - # Pelias runs on host network via its own compose, so the wrapper - # reaches it via host.docker.internal (Pelias API at :4000). - extra_hosts: - - "host.docker.internal:host-gateway" environment: TZ: Europe/Berlin PORT: 3018 - PELIAS_API_URL: http://host.docker.internal:4000/v1 # Self-hosted Photon on mana-gpu (cross-LAN). When set, registers # as `photon-self` provider with privacy: 'local' — eligible for # sensitive queries. Empty value = slot disabled. diff --git a/docker/prometheus/prometheus.yml b/docker/prometheus/prometheus.yml index 996ef20eb..508cd9e01 100644 --- a/docker/prometheus/prometheus.yml +++ b/docker/prometheus/prometheus.yml @@ -314,9 +314,9 @@ scrape_configs: - targets: # mana-geocoding's own health (Hono wrapper) - http://mana-geocoding:3018/health - # Upstream Pelias health, proxied through the wrapper so the + # Upstream photon-self health, proxied through the wrapper so the # blackbox-exporter doesn't need host.docker.internal access. - - http://mana-geocoding:3018/health/pelias + - http://mana-geocoding:3018/health/photon-self # mana-ai (Mission Runner) — internal-only, no CF tunnel. - http://mana-ai:3066/health relabel_configs: diff --git a/scripts/generate-status-page.sh b/scripts/generate-status-page.sh index c83c9598b..b79605283 100755 --- a/scripts/generate-status-page.sh +++ b/scripts/generate-status-page.sh @@ -69,8 +69,8 @@ friendly_name() { name="${name#http://}" # Interne Services (Docker-Netz): mana-geocoding:3018/health → Mana Geocoding case "$name" in - mana-geocoding:*/health/pelias) - name="Pelias (via Geocoding)" + mana-geocoding:*/health/photon-self) + name="Photon-Self (via Geocoding)" ;; mana-geocoding:*) name="Mana Geocoding" diff --git a/services/mana-geocoding/CLAUDE.md b/services/mana-geocoding/CLAUDE.md index 16bcb7cf0..342d999a2 100644 --- a/services/mana-geocoding/CLAUDE.md +++ b/services/mana-geocoding/CLAUDE.md @@ -1,6 +1,12 @@ # mana-geocoding -Geocoding service for the Places module. **Provider-chain architecture** — tries a self-hosted Pelias first, falls back to public Photon (komoot) and then public Nominatim (OSM) when Pelias is unhealthy or unreachable. All Pelias-served queries stay on our infrastructure; fallback queries leak the search string to a public OSM endpoint. +Geocoding service for the Places module and other map-aware modules. +**Provider-chain architecture** — tries self-hosted Photon (`photon-self`, +on mana-gpu) first, falls back to public Photon (komoot) and then public +Nominatim (OSM) when photon-self is unhealthy. All photon-self queries +stay on our infrastructure; fallback queries leak the search string to a +public OSM endpoint, with sensitive-query blocking + coord quantization ++ aggressive caching as privacy mitigations. ## Tech Stack @@ -8,51 +14,51 @@ Geocoding service for the Places module. **Provider-chain architecture** — tri |-------|------------| | **Runtime** | Bun | | **Framework** | Hono | -| **Primary geocoder** | Pelias (self-hosted, Elasticsearch-backed) | +| **Primary geocoder** | Self-hosted Photon (`photon-self`, on mana-gpu via WSL2) | | **Fallback 1** | [Photon](https://photon.komoot.io) (public, no rate limit advertised) | | **Fallback 2** | [Nominatim](https://nominatim.openstreetmap.org) (public, 1 req/sec strict) | -| **Data** | OpenStreetMap DACH extract (DE/AT/CH) for Pelias; global OSM for the public fallbacks | -| **Caching** | In-memory LRU (5000 entries, 24h TTL) — applies to all provider answers | +| **Data** | Photon-Europe pre-built index (Java JAR + embedded OpenSearch) | +| **Caching** | In-memory LRU (5000 entries; 24h for `photon-self`, 1h for public answers) | ## Port: 3018 +## Pelias has been retired + +Pelias was the original primary backend (DACH OSM index, Elasticsearch + +libpostal). It was stopped on 2026-04-28 because it ate ~3.2 GB RAM on +the Mac mini and was crushing the host into 8.6 GB swap. The provider +adapter, the JSON config patch hacks, and the entire `pelias/` stack +were removed from this repo on the same day. See +[`docs/reports/geocoding-self-hosting-2026-04-28.md`](../../docs/reports/geocoding-self-hosting-2026-04-28.md) +for the decision rationale and the migration log with WSL2 gotchas. + ## Quick Start ```bash -# 1. Start Pelias stack (first time: run setup.sh for data import) -cd services/mana-geocoding/pelias -docker compose up -d -# First time only: -chmod +x setup.sh && ./setup.sh - -# 2. Start the Hono wrapper cd services/mana-geocoding bun run dev ``` +The wrapper boots with no upstream of its own (it's a thin proxy in +front of `photon-self` + public providers). For a real local-dev hit +against `photon-self`, set `PHOTON_SELF_API_URL` to the GPU server +(e.g. `http://192.168.178.11:2322`); otherwise the chain runs on the +public providers only. + ## API Endpoints -All endpoints are public (no auth required) — the service is internal-only, not exposed to the internet. +All endpoints are public (no auth required) — the service is internal-only, +not exposed to the internet. The web app reaches it via a same-origin +proxy at `apps/mana/apps/web/src/routes/api/v1/geocode/[...path]/+server.ts`. | Method | Path | Description | |--------|------|-------------| | GET | `/api/v1/geocode/search?q=...` | Forward geocoding / autocomplete | | GET | `/api/v1/geocode/reverse?lat=...&lon=...` | Reverse geocoding | -| GET | `/api/v1/geocode/stats` | Cache statistics | +| GET | `/api/v1/geocode/stats` | Cache statistics + provider snapshot | | GET | `/health` | Wrapper health | -| GET | `/health/pelias` | Upstream Pelias health (used by blackbox monitoring) | - -### Forward-search strategy - -The wrapper queries Pelias `/autocomplete` first (fast, fuzzy, optimised for -venue names like "Konzil Restaurant"). If that returns zero features, it -falls back to `/search`, which covers the address layer that autocomplete -deliberately excludes as a performance optimisation. - -This gives the best of both worlds: quick venue matches for free-text -queries AND reliable results for street-style queries like "Marktstätte -Konstanz". See `src/routes/geocode.ts` — the fallback is baked into the -forward handler. +| GET | `/health/photon-self` | Upstream `photon-self` health (used by blackbox monitoring) | +| GET | `/health/providers` | Per-provider health snapshot | ### Search params @@ -78,7 +84,7 @@ forward handler. { "results": [ { - "label": "Münster Café, Münsterplatz 3, 78462 Konstanz", + "label": "Münster Café, Münsterplatz 3, 78462 Konstanz, Deutschland", "name": "Münster Café", "latitude": 47.663, "longitude": 9.175, @@ -87,75 +93,53 @@ forward handler. "houseNumber": "3", "postalCode": "78462", "city": "Konstanz", - "country": "Germany" + "state": "Baden-Württemberg", + "country": "Deutschland" }, "category": "food", - "peliasCategories": ["food", "retail", "nightlife"], - "confidence": 0.95 + "confidence": 0.78, + "provider": "photon-self" } - ] + ], + "provider": "photon-self", + "tried": ["photon-self"] } ``` +The response body includes `provider: 'photon-self' | 'photon' | 'nominatim'`, +`tried: ProviderName[]`, and an optional `notice` +(`'fallback_used'` or `'sensitive_local_unavailable'`) so the caller can +render an "approximate match" hint or explain why a sensitive query +returned 0 results. + ## Category Mapping -Pelias' OSM importer tags each venue with its own taxonomy (`food`, `retail`, -`transport`, `health`, `education`, …). We collapse those into the 7 -PlaceCategories used by the Places module, using a **priority-ordered list** -so the most specific signal wins: +Photon and Nominatim emit raw OSM tags (`amenity:restaurant`, +`shop:supermarket`, `public_transport:station`, …) which we collapse +into the 7 PlaceCategories used by the Places module. Mapping logic in +`src/lib/osm-category-map.ts` — priority-ordered so the most specific +signal wins (e.g. `amenity:restaurant` → `food` even if also tagged as +`shop`). -| PlaceCategory | Wins if Pelias categories contain | -|---------------|-----------------------------------| -| `food` | `food` (beats retail/nightlife — a restaurant is food) | -| `transit` | `transport`, `transport:public`, `transport:air`, `transport:bus`, `transport:taxi`, `transport:sea` | -| `shopping` | `retail` (when no `food` present) | -| `leisure` | `entertainment`, `nightlife`, `recreation` | -| `work` | `education`, `professional`, `government`, `finance` | -| `other` | `health`, `religion`, everything else | +| PlaceCategory | Wins for tags | +|---------------|---------------| +| `food` | `amenity:restaurant`, `amenity:cafe`, `amenity:fast_food`, `amenity:bar`, `amenity:pub`, `amenity:bakery` | +| `transit` | `amenity:bus_station`, `public_transport:station`, `railway:station`, `aeroway:terminal`, `amenity:car_rental` | +| `shopping` | `shop` (any value) | +| `leisure` | `leisure` (most), `tourism:attraction`, `amenity:cinema`, `amenity:theatre` | +| `work` | `office`, `amenity:bank`, `amenity:townhall`, `amenity:embassy`, `amenity:school`, `amenity:university` | +| `other` | health (`amenity:hospital`, `amenity:clinic`, `healthcare:*`), religion (`amenity:place_of_worship`), addresses, fall-through | | `home` | (not auto-detected — set manually by the user) | -**Example mappings verified on the DACH index:** - -| OSM venue | Pelias categories | → PlaceCategory | -|-----------|-------------------|-----------------| -| Konzil Konstanz Restaurant | `[food, retail, nightlife]` | `food` | -| Bahnhof Konstanz | `[transport, transport:station]` | `transit` | -| Physiotherapie-Schule | `[education]` | `work` | -| MX-Park (Rennstrecke) | `[recreation]` | `leisure` | - -The priority list lives in `src/lib/category-map.ts` — update it if you want -a Pelias category to map somewhere else. - -### Critical: the Pelias API patch - -By default, Pelias **hides** the `category` field from API responses unless -the caller explicitly passes `?categories=...` — a quirk intended for keyword -filtering that also strips category metadata from normal address queries. We -work around this by mounting a **patched copy** of -`helper/geojsonify_place_details.js` over the upstream one in the `pelias-api` -container (`pelias/geojsonify_place_details.js`). The patch changes -`condition: checkCategoryParam` → `condition: () => true` so the category -array always flows through to the wrapper. - -If you bump the `pelias/api` image, regenerate the patched file: - -```bash -cd services/mana-geocoding/pelias -docker run --rm pelias/api:latest cat /code/pelias/api/helper/geojsonify_place_details.js \ - | sed 's|condition: checkCategoryParam|condition: () => true|' \ - > geojsonify_place_details.js -docker compose up -d --force-recreate api -``` - ## Configuration ```env PORT=3018 # --- Provider chain (tried in order) ---------------------------------- -# Default order: photon-self,pelias,photon,nominatim +# Default order: photon-self,photon,nominatim # `photon-self` is silently dropped if PHOTON_SELF_API_URL is unset. -GEOCODING_PROVIDERS=photon-self,pelias,photon,nominatim +GEOCODING_PROVIDERS=photon-self,photon,nominatim PROVIDER_TIMEOUT_MS=8000 # per-provider request timeout (cold-start safe) PROVIDER_HEALTH_CACHE_MS=30000 # health-cache TTL — skip dead providers @@ -165,13 +149,10 @@ PROVIDER_HEALTH_CACHE_MS=30000 # health-cache TTL — skip dead providers # Set in .env.macmini; flow into the container via docker-compose env. PHOTON_SELF_API_URL=http://192.168.178.11:2322 -# --- Pelias (legacy, currently stopped — privacy: 'local') ------------ -PELIAS_API_URL=http://pelias-api:4000/v1 - # --- Public Photon (privacy: 'public', last-resort fallback) ---------- PHOTON_API_URL=https://photon.komoot.io -# --- Nominatim (fallback 2) ------------------------------------------- +# --- Nominatim (last-resort fallback) --------------------------------- NOMINATIM_API_URL=https://nominatim.openstreetmap.org NOMINATIM_USER_AGENT=mana-geocoding/1.0 (+https://mana.how; kontakt@memoro.ai) NOMINATIM_INTERVAL_MS=1100 # >= 1000 to honor 1 req/sec policy @@ -180,7 +161,9 @@ NOMINATIM_INTERVAL_MS=1100 # >= 1000 to honor 1 req/sec policy CORS_ORIGINS=http://localhost:5173,https://mana.how CACHE_MAX_ENTRIES=5000 CACHE_TTL_MS=86400000 # 24h — used for local-provider answers -CACHE_PUBLIC_TTL_MS=604800000 # 7d — extended TTL for public-API answers (privacy) +CACHE_PUBLIC_TTL_MS=3600000 # 1h — short TTL for public-API answers so a + # transient photon-self blip doesn't pin + # stale fallback answers in cache for days. ``` To **disable a provider**, drop it from `GEOCODING_PROVIDERS`. To run with @@ -195,9 +178,7 @@ The dual-Photon split: fallback for non-sensitive queries when self-hosted is down. Both share the same `PhotonProvider` class — only the URL, name, and -privacy stance differ. See the [migration runbook](../../docs/runbooks/photon-on-mana-gpu.md) -and [decision report](../../docs/reports/geocoding-self-hosting-2026-04-28.md) -for the operational story. +privacy stance differ. ## Provider-chain semantics @@ -213,40 +194,44 @@ skips it for the rest of the cache window. The next request after the cache expires re-probes lazily — there is no background health pinger. ``` -Client (Places module) +Client (Places module, etc.) → mana-geocoding (Hono, port 3018) - → LRU cache (24h TTL) ← hit: ~0 ms + → LRU cache (24h local / 1h public) ← hit: ~0 ms → Provider chain - 1. Pelias ← reachable: 50–200 ms (DACH index, fully featured) - 2. Photon ← fallback: 200–500 ms public, partial features - 3. Nominatim ← last resort: 200–800 ms + 1 req/sec queue + 1. photon-self ← reachable: 50–200 ms (cross-LAN to mana-gpu) + 2. photon ← public fallback: 200–500 ms + 3. nominatim ← last resort: 200–800 ms + 1 req/sec queue ``` -The response body includes `provider: 'pelias' | 'photon' | 'nominatim'`, -`tried: ProviderName[]`, and an optional `notice` (`'fallback_used'` or -`'sensitive_local_unavailable'`) so the caller can render an -"approximate match" hint or explain why a sensitive query returned 0 -results. +### Why the public TTL is short (1h) + +When photon-self has a transient cross-LAN blip and a request falls +through to public photon, the public answer used to be cached for 7 days +— pinning the cached fallback even after photon-self recovered. With +the 1h TTL the chain returns to photon-self within an hour. The privacy +benefit of long TTLs (fewer outbound queries) is moot now that +photon-self serves the bulk of traffic; only fallback answers go through +public providers. ## Privacy hardening -When a request goes to Pelias, the user's query content + focus point -stay on our infrastructure. When it falls through to Photon or -Nominatim, the query is forwarded to a third party. Three independent -defenses limit what those third parties can learn: +When a request goes to `photon-self`, the user's query content + focus +point stay on our infrastructure. When it falls through to public +Photon or Nominatim, the query is forwarded to a third party. Three +independent defenses limit what those third parties can learn: ### 1. Sensitive-query block (`src/lib/sensitive-query.ts`) Queries matching the medical / mental-health / crisis-service keyword list (`Hausarzt`, `Psychiater`, `Klinikum`, `Suchtberatung`, `HIV`, -`Frauenhaus`, …) are **never forwarded to public APIs**, even if Pelias -is unreachable. The chain detects sensitivity at the route layer and -calls `chain.search(req, signal, { localOnly: true })` — providers with -`privacy: 'public'` are filtered out *before* the iteration begins, so -there is no race window. +`Frauenhaus`, …) are **never forwarded to public APIs**, even if +photon-self is unreachable. The chain detects sensitivity at the route +layer and calls `chain.search(req, signal, { localOnly: true })` — +providers with `privacy: 'public'` are filtered out *before* the +iteration begins, so there is no race window. -When no local provider is available (e.g. Pelias is stopped), a -sensitive query returns `ok: true, results: [], notice: +When no local provider is available (e.g. `PHOTON_SELF_API_URL` is +unset), a sensitive query returns `ok: true, results: [], notice: 'sensitive_local_unavailable'`. The UI should show "Diese Suche bleibt bewusst lokal — kein Treffer im DACH-Index. Versuche eine allgemeinere Formulierung." rather than "no results". @@ -267,17 +252,18 @@ Coordinates are rounded before forwarding to public providers: City-block resolution — sufficient for "what's near me?", avoids logging exact home/workplace coordinates to a third party. -Pelias always gets full-precision coordinates — quantization only -applies on the way out to public APIs. +`photon-self` always gets full-precision coordinates — quantization +only applies on the way out to public APIs. -### 3. Aggressive caching of public-API answers +### 3. Caching of public-API answers -`config.cache.publicTtlMs` (default 7 days) overrides the default 24h -cache TTL when the response came from a public provider. Same query -from 1000 different users → 1 outbound request to Photon/Nominatim. -This is the strongest privacy lever we have over public providers, -since we can't change their logging behavior — only the rate at which -we feed them queries. +`config.cache.publicTtlMs` (default 1h) overrides the default 24h cache +TTL when the response came from a public provider. Same query from +multiple users within an hour → 1 outbound request to Photon/Nominatim. +The TTL is short by design (see "Why the public TTL is short" above) — +the strong caching lever was an artifact of the era when public Photon +was THE fallback for a stopped Pelias; today it's a last-resort fallback +behind a healthy photon-self. ### What this protects + what it doesn't @@ -286,8 +272,8 @@ we feed them queries. | Public API sees user's IP | ✓ (wrapper is the proxy, only mac-mini IP goes out) | | Public API sees user identity / JWT | ✓ (wrapper sends no auth headers) | | Public API sees query content | partial — sensitive queries blocked entirely, others go through | -| Public API sees user's exact GPS | ✓ (quantized to ~1km / ~110m) | -| Aggregate location-intent profiling | partial — cache reduces volume ~10–100× | +| Public API sees user's exact GPS | ✓ (quantized to ~1 km / ~110 m) | +| Aggregate location-intent profiling | partial — cache reduces volume modestly | | TLS-level traffic analysis (timing) | ✗ (not in scope) | | Compelled disclosure of public-API logs | ✗ (no legal mitigation) | @@ -295,89 +281,42 @@ Residual risk for non-sensitive queries: "third party learns what queries our backend made, with timestamps, but not who made them." Acceptable for restaurant/landmark lookups, blocked for medical lookups. -## Pelias Infrastructure +## photon-self infrastructure -The Pelias stack runs as a separate docker-compose in `pelias/`: +Photon runs on **mana-gpu** (Windows 11 + WSL2 + Docker), as a Java JAR +inside `eclipse-temurin:21-jre` with the unpacked Photon-Europe data +directory (~80 GB) mounted in. Cross-LAN reachable from the Mac mini via +WSL2 mirrored networking on `192.168.178.11:2322`. -- **elasticsearch** — Index storage (Docker volume, ~5GB for DACH after - indexing 13.4M OSM objects — 10M addresses + 3.3M venues) -- **api** — HTTP API (port 4000), patched for category passthrough -- **libpostal** — Address parsing (internal only, not exposed on host port - because 4400 collides with mana-infra-landings on the Mac Mini) -- **Import containers** — Run once for initial data load, then stopped +Operator scripts for the weekly DB refresh live in +`services/mana-geocoding/photon-self/`: -**Production RAM usage** (measured on the Mac Mini after the 2026-04-11 deploy): +| File | Purpose | +|------|---------| +| `photon-update.sh` | Atomic-swap update script — downloads new tarball, unpacks, restarts the container, rolls back on failure. Installed on mana-gpu at `/usr/local/bin/photon-update.sh`. | +| `photon-update.service` | systemd oneshot unit that runs `photon-update.sh`. | +| `photon-update.timer` | systemd timer (Sun 03:30 + 30min jitter, `Persistent=true`). | +| `README.md` | Re-installation steps for DR scenarios + manual test commands. | -| Container | RAM | -|---|---| -| pelias-elasticsearch | ~1.2 GB | -| pelias-libpostal | ~1.9 GB (address parser model) | -| pelias-api | ~100 MB | -| mana-geocoding (wrapper) | ~20–60 MB | - -Total: **~3.2 GB** — larger than the initial ~1.5 GB estimate because -libpostal loads its full address parser into memory up front. - -### Initial import (one-time) - -The DACH PBF extract is ~5GB and takes 30-45 minutes to index. See -`pelias/setup.sh` for the full pipeline. Key steps, in order: - -1. `docker compose up -d` — bring up ES, api, libpostal -2. `docker exec pelias-elasticsearch elasticsearch-plugin install analysis-icu` - then restart — the official ES image doesn't ship `analysis-icu` which - Pelias' schema mapping requires -3. `docker compose --profile import run --rm schema ./bin/create_index` -4. `docker compose --profile import run --rm openstreetmap ./bin/download` - (downloads `dach-latest.osm.pbf` from Geofabrik, ~5GB) -5. **Rename** `dach-latest.osm.pbf` → `planet-latest.osm.pbf` inside the - pelias-data volume (Pelias' importer expects that filename). The - `pelias.json` config references it as `planet-latest.osm.pbf` too. -6. `docker compose --profile import run --rm openstreetmap ./bin/start` - (22M objects, ~30 min on an M2 Mac mini) - -### pelias.json gotchas - -A few non-obvious settings required for a self-hosted DACH deployment: - -- **`adminLookup.enabled: false`** — Pelias tries to resolve country/region - hierarchies via "Who's On First" data by default. We don't import WOF, - so this must be disabled or import crashes with `unable to locate sqlite - folder`. -- **`leveldbpath: "/data/leveldb"`** — not `/tmp/leveldb`; the container - user (1001) needs write access and `/tmp` is not mounted. -- **`api.services.libpostal: { url: "..." }`** — must be an object, not a - string. The API's Joi schema rejects the string form. -- **Only declare services you actually run.** We used to list `placeholder`, - `pip`, and `interpolation` in `api.services` but never ran the containers; - Pelias logged `ENOTFOUND` errors on every query. Dropping the unused - entries makes Pelias degrade cleanly to libpostal-only parsing (warns - `service disabled` once at startup, then silent). -- **No `defaultParameters.boundary.country`** — Pelias only accepts a - single country value for `boundary.country`. Since our index only - contains DACH data anyway, we drop the filter entirely. -- **`features: { filename: "planet-latest.osm.pbf" }`** — required because - Geofabrik downloads come named `dach-latest.osm.pbf`, but Pelias' - openstreetmap importer looks for `planet-latest.osm.pbf` by default. +The migration log + 5 WSL2 gotchas are documented in +[`docs/reports/geocoding-self-hosting-2026-04-28.md`](../../docs/reports/geocoding-self-hosting-2026-04-28.md). ### Wrapper gotchas - **`idleTimeout: 60`** on `Bun.serve` — the default 10 s cuts off cold - queries that hit Elasticsearch and libpostal in sequence. 60 s is - generous for the worst case while still catching actually-stuck - connections. -- **Colima bind-mount cache.** The mac-mini bind-mounts this repo's files - into several monitoring containers. Colima on macOS sometimes serves a - stale view of a bind-mounted file even after the file on disk changes. - After editing `scripts/generate-status-page.sh` (also bind-mounted into - `mana-status-gen`), restart the consuming container so it sees the - fresh content: `docker restart mana-status-gen`. -- **`host.docker.internal` doesn't resolve from blackbox-exporter** on - Colima, so the external monitoring can't probe pelias-api or - elasticsearch directly. Instead, the wrapper exposes `/health/pelias` - which proxies a request to Pelias; Prometheus probes that internal - endpoint inside the docker network. See `prometheus.yml` job - `blackbox-internal`. + cross-LAN queries to photon-self where OpenSearch needs to recover + shards. 60 s is generous for the worst case while still catching + actually-stuck connections. +- **Cross-LAN reach is occasionally flaky.** A photon-self request + sometimes hangs for the full `PROVIDER_TIMEOUT_MS` (8 s default), which + marks the provider unhealthy for 30 s. During that window, requests + fall through to public photon. With `CACHE_PUBLIC_TTL_MS=3600000` (1h), + the cached public answers expire fast enough that the chain returns to + photon-self once it's healthy again. +- **`host.docker.internal` is no longer needed.** The Pelias era used + `extra_hosts: host.docker.internal:host-gateway` to reach Pelias on + the host network. photon-self is reached over LAN by IP, so the + docker-compose entry no longer carries `extra_hosts`. ## Testing @@ -392,28 +331,27 @@ cd services/mana-geocoding bun test ``` -- `src/lib/__tests__/category-map.test.ts` — Pelias→PlaceCategory - priority resolution. -- `src/lib/__tests__/osm-category-map.test.ts` — raw OSM-tag→PlaceCategory - mapping used by Photon + Nominatim (since they emit `class:type` rather - than Pelias's curated taxonomy). +- `src/lib/__tests__/osm-category-map.test.ts` — raw OSM-tag → + PlaceCategory mapping (used by Photon + Nominatim). - `src/lib/__tests__/cache.test.ts` — LRU eviction order, TTL expiry, move-to-end on `get`, size tracking. - `src/lib/__tests__/rate-limiter.test.ts` — single-token rate limiter (used to enforce Nominatim's 1 req/sec policy). FIFO order, abort cleanup, busy-flag release on aborted interval-wait. -- `src/providers/__tests__/chain.test.ts` — provider chain failover, health - cache, "stop on empty results" semantics. +- `src/lib/__tests__/privacy.test.ts` — coordinate quantization edge + cases. +- `src/lib/__tests__/sensitive-query.test.ts` — keyword-list coverage. +- `src/providers/__tests__/chain.test.ts` — provider chain failover, + health cache, "stop on empty results" semantics, localOnly mode. - `src/providers/__tests__/photon-normalizer.test.ts` and - `nominatim-normalizer.test.ts` — locking the wire-format mapping for the - two public fallback providers. - -As of the 2026-04-28 privacy-hardening rollout: **141 tests, all green**. + `nominatim-normalizer.test.ts` — wire-format mapping for the two + public providers. +- `src/__tests__/app.test.ts` — `createChain()` registration tests + (photon-self opt-in via env-var, chain order honored). ### Smoke test (`bun run test:smoke`) -End-to-end curls against a running service. Requires a fully deployed -Pelias stack with the DACH index loaded — run this after a deploy to +End-to-end curls against a running service. Run after a deploy to confirm the full pipeline is healthy. ```bash @@ -422,9 +360,9 @@ bun run test:smoke # default http://localhost:3 ./scripts/smoke-test.sh http://mana-geocoding:3018 # from another container ``` -Asserts: wrapper + pelias health, restaurant→food, station→transit, -street+locality fallback returns results, focus biasing works, reverse -geocoding for Konstanz and München, cache hit on repeat. 9 checks. +Asserts: wrapper + photon-self health, restaurant→food category, +station→transit, street/locality fallback, focus biasing, reverse +geocoding for Konstanz and München, cache hit on repeat. ## Code Layout @@ -435,22 +373,22 @@ src/ ├── config.ts # Environment config (incl. provider list) ├── routes/ │ ├── geocode.ts # Forward + reverse, delegates to chain -│ └── health.ts # /health, /health/pelias, /health/providers +│ └── health.ts # /health, /health/photon-self, /health/providers ├── providers/ │ ├── types.ts # GeocodingProvider interface, shared shape │ ├── chain.ts # Failover orchestrator + health cache -│ ├── pelias.ts # Primary: self-hosted DACH Pelias -│ ├── photon.ts # Fallback 1: photon.komoot.io -│ └── nominatim.ts # Fallback 2: nominatim.openstreetmap.org +│ ├── photon.ts # photon-self + public photon (same class, two configs) +│ └── nominatim.ts # Public nominatim.openstreetmap.org └── lib/ ├── cache.ts # LRU cache with TTL + per-entry override - ├── category-map.ts # Pelias-taxonomy → PlaceCategory + ├── category-map.ts # PlaceCategory type definition ├── osm-category-map.ts # Raw OSM `class:type` → PlaceCategory ├── privacy.ts # Coordinate quantization for public APIs ├── rate-limiter.ts # Single-token limiter (used by Nominatim) └── sensitive-query.ts # Health/crisis keyword detector -pelias/ -├── docker-compose.yml # Pelias stack -├── pelias.json # Pelias config (DACH region) -└── setup.sh # Initial data import script +photon-self/ # Operator scripts for the mana-gpu Photon +├── photon-update.sh # Atomic-swap weekly update (deployed to mana-gpu) +├── photon-update.service # systemd oneshot unit +├── photon-update.timer # systemd weekly timer +└── README.md # Re-install steps for DR ``` diff --git a/services/mana-geocoding/pelias/docker-compose.yml b/services/mana-geocoding/pelias/docker-compose.yml deleted file mode 100644 index f77258953..000000000 --- a/services/mana-geocoding/pelias/docker-compose.yml +++ /dev/null @@ -1,121 +0,0 @@ -# Pelias geocoding stack for mana-geocoding. -# -# Data pipeline: download → prepare → import → serve. -# See pelias/README.md for initial setup instructions. -# -# After import, only `api` and `libpostal` need to stay running. -# The import containers (placeholder, interpolation, pip, elasticsearch) -# run during import and can be stopped afterward if RAM is tight, -# but elasticsearch must stay up for queries. - -services: - # --- Always running --- - - api: - image: pelias/api:latest - container_name: pelias-api - restart: unless-stopped - ports: - - "4000:4000" - environment: - PORT: 4000 - volumes: - - ./pelias.json:/code/pelias.json:ro - # Patch: always return the `category` field in API responses, not only - # when a `categories=...` filter is present. Pelias' default - # `checkCategoryParam` hides category from results unless the caller - # filters by it, but we want the OSM taxonomy (food, retail, transport, …) - # on every venue so our Places UI can auto-map it to a PlaceCategory. - # The patched file is generated from the upstream one with - # `sed "s|condition: checkCategoryParam|condition: () => true|"` - - ./geojsonify_place_details.js:/code/pelias/api/helper/geojsonify_place_details.js:ro - depends_on: - elasticsearch: - condition: service_healthy - networks: - - pelias - - libpostal: - image: pelias/libpostal-service - container_name: pelias-libpostal - restart: unless-stopped - # No host port mapping — libpostal is an internal dependency of - # pelias-api, reached over the pelias network at libpostal:4400. - # Port 4400 on the host is used by mana-infra-landings (nginx for - # status.mana.how) on the production mac mini. - expose: - - "4400" - networks: - - pelias - - elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:7.17.1 - container_name: pelias-elasticsearch - restart: unless-stopped - ports: - - "9200:9200" - volumes: - - pelias-elasticsearch:/usr/share/elasticsearch/data - environment: - ES_JAVA_OPTS: "-Xms512m -Xmx512m" - discovery.type: single-node - xpack.security.enabled: "false" - ulimits: - memlock: - soft: -1 - hard: -1 - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9200/_cluster/health"] - interval: 10s - timeout: 5s - retries: 30 - networks: - - pelias - - # --- Import pipeline (run once, then stop) --- - - schema: - image: pelias/schema:latest - container_name: pelias-schema - volumes: - - ./pelias.json:/code/pelias.json:ro - depends_on: - elasticsearch: - condition: service_healthy - networks: - - pelias - profiles: ["import"] - - openstreetmap: - image: pelias/openstreetmap:latest - container_name: pelias-openstreetmap - volumes: - - ./pelias.json:/code/pelias.json:ro - - pelias-data:/data - depends_on: - elasticsearch: - condition: service_healthy - networks: - - pelias - profiles: ["import"] - - polylines: - image: pelias/polylines:latest - container_name: pelias-polylines - volumes: - - ./pelias.json:/code/pelias.json:ro - - pelias-data:/data - depends_on: - elasticsearch: - condition: service_healthy - networks: - - pelias - profiles: ["import"] - -volumes: - pelias-elasticsearch: - pelias-data: - -networks: - pelias: - driver: bridge diff --git a/services/mana-geocoding/pelias/geojsonify_place_details.js b/services/mana-geocoding/pelias/geojsonify_place_details.js deleted file mode 100644 index 6d209ddad..000000000 --- a/services/mana-geocoding/pelias/geojsonify_place_details.js +++ /dev/null @@ -1,123 +0,0 @@ -const _ = require('lodash'); -const field = require('./fieldValue'); - -// Properties to be copied -// If a property is identified as a single string, assume it should be presented as a string in response -// If something other than string is desired, use the following structure: { name: 'category', type: 'array' } -const DETAILS_PROPS = [ - { name: 'unit', type: 'string' }, - { name: 'housenumber', type: 'string' }, - { name: 'street', type: 'string' }, - { name: 'postalcode', type: 'string' }, - { name: 'postalcode_gid', type: 'string' }, - { name: 'confidence', type: 'default' }, - { name: 'match_type', type: 'string' }, - { name: 'distance', type: 'default' }, - { name: 'accuracy', type: 'string' }, - { name: 'country', type: 'string' }, - { name: 'country_gid', type: 'string' }, - { name: 'country_a', type: 'string' }, - { name: 'dependency', type: 'string' }, - { name: 'dependency_gid', type: 'string' }, - { name: 'dependency_a', type: 'string' }, - { name: 'macroregion', type: 'string' }, - { name: 'macroregion_gid', type: 'string' }, - { name: 'macroregion_a', type: 'string' }, - { name: 'region', type: 'string' }, - { name: 'region_gid', type: 'string' }, - { name: 'region_a', type: 'string' }, - { name: 'macrocounty', type: 'string' }, - { name: 'macrocounty_gid', type: 'string' }, - { name: 'macrocounty_a', type: 'string' }, - { name: 'county', type: 'string' }, - { name: 'county_gid', type: 'string' }, - { name: 'county_a', type: 'string' }, - { name: 'localadmin', type: 'string' }, - { name: 'localadmin_gid', type: 'string' }, - { name: 'localadmin_a', type: 'string' }, - { name: 'locality', type: 'string' }, - { name: 'locality_gid', type: 'string' }, - { name: 'locality_a', type: 'string' }, - { name: 'borough', type: 'string' }, - { name: 'borough_gid', type: 'string' }, - { name: 'borough_a', type: 'string' }, - { name: 'neighbourhood', type: 'string' }, - { name: 'neighbourhood_gid', type: 'string' }, - { name: 'continent', type: 'string' }, - { name: 'continent_gid', type: 'string' }, - { name: 'continent_a', type: 'string' }, - { name: 'empire', type: 'string', condition: _.negate(hasCountry) }, - { name: 'empire_gid', type: 'string', condition: _.negate(hasCountry) }, - { name: 'empire_a', type: 'string', condition: _.negate(hasCountry) }, - { name: 'ocean', type: 'string' }, - { name: 'ocean_gid', type: 'string' }, - { name: 'ocean_a', type: 'string' }, - { name: 'marinearea', type: 'string' }, - { name: 'marinearea_gid', type: 'string' }, - { name: 'marinearea_a', type: 'string' }, - { name: 'bounding_box', type: 'default' }, - { name: 'label', type: 'string' }, - { name: 'category', type: 'array', condition: () => true }, -]; - -const EXTENDED_PROPS = DETAILS_PROPS.concat([ - { name: 'population', type: 'default' }, - { name: 'popularity', type: 'default' }, -]); - -// returns true IFF source a country_gid property -function hasCountry(params, source) { - return source.hasOwnProperty('country_gid'); -} - -function checkCategoryParam(params) { - return _.isObject(params) && params.hasOwnProperty('categories'); -} - -/** - * Collect the specified properties from source into an object and return it - * Ignore missing properties. - * - * @param {object} params clean query params - * @param {object} source - * @param {object} dst - */ -function collectProperties(params, source) { - let props = DETAILS_PROPS; - - // extended properties when debugging mode is enabled - if (params.enableDebug === true) { - props = EXTENDED_PROPS; - } - - return props.reduce((result, prop) => { - // if condition isn't met, don't set the property - if (_.isFunction(prop.condition) && !prop.condition(params, source)) { - return result; - } - - if (source.hasOwnProperty(prop.name)) { - let value = null; - - switch (prop.type) { - case 'string': - value = field.getStringValue(source[prop.name]); - break; - case 'array': - value = field.getArrayValue(source[prop.name]); - break; - // default behavior is to copy property exactly as is - default: - value = source[prop.name]; - } - - if (_.isNumber(value) || (value && !_.isEmpty(value))) { - result[prop.name] = value; - } - } - - return result; - }, {}); -} - -module.exports = collectProperties; diff --git a/services/mana-geocoding/pelias/pelias.json b/services/mana-geocoding/pelias/pelias.json deleted file mode 100644 index b06280455..000000000 --- a/services/mana-geocoding/pelias/pelias.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "esclient": { - "apiVersion": "7.x", - "hosts": [ - { - "host": "elasticsearch", - "port": 9200 - } - ] - }, - "api": { - "services": { - "libpostal": { - "url": "http://libpostal:4400" - } - }, - "defaultParameters": {} - }, - "imports": { - "adminLookup": { - "enabled": false - }, - "openstreetmap": { - "download": [ - { - "sourceURL": "https://download.geofabrik.de/europe/dach-latest.osm.pbf" - } - ], - "datapath": "/data/openstreetmap", - "leveldbpath": "/data/leveldb", - "importVenues": true, - "importAddresses": true, - "adminLookup": false, - "useAdminHierarchyLabels": false, - "leveldb": { - "cacheSize": 256 - }, - "import": [ - { - "filename": "planet-latest.osm.pbf" - } - ] - }, - "polylines": { - "datapath": "/data/polylines", - "files": ["extract.0sv"] - } - }, - "logger": { - "level": "info" - } -} diff --git a/services/mana-geocoding/pelias/setup.sh b/services/mana-geocoding/pelias/setup.sh deleted file mode 100755 index 68128f09f..000000000 --- a/services/mana-geocoding/pelias/setup.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -# Initial Pelias data import for DACH region. -# -# Run this ONCE after first docker compose up. -# Takes 30-60 minutes depending on hardware. -# -# After import, the "import" profile containers can be stopped. - -set -euo pipefail -cd "$(dirname "$0")" - -echo "=== Step 1: Create Elasticsearch schema ===" -docker compose --profile import run --rm schema ./bin/create_index - -echo "=== Step 2: Download DACH OSM data ===" -mkdir -p data/openstreetmap -docker compose --profile import run --rm openstreetmap ./bin/download - -echo "=== Step 3: Import OpenStreetMap data ===" -docker compose --profile import run --rm openstreetmap ./bin/start - -echo "=== Step 4: Import polylines (street data) ===" -docker compose --profile import run --rm polylines ./bin/download -docker compose --profile import run --rm polylines ./bin/start - -echo "" -echo "=== Import complete! ===" -echo "Pelias API is available at http://localhost:4000/v1" -echo "" -echo "Test it:" -echo " curl 'http://localhost:4000/v1/search?text=Münsterplatz+Konstanz'" -echo " curl 'http://localhost:4000/v1/reverse?point.lat=47.663&point.lon=9.175'" -echo "" -echo "You can now stop the import containers:" -echo " docker compose --profile import stop" diff --git a/services/mana-geocoding/scripts/smoke-test.sh b/services/mana-geocoding/scripts/smoke-test.sh index ac264697f..eda2f3e85 100755 --- a/services/mana-geocoding/scripts/smoke-test.sh +++ b/services/mana-geocoding/scripts/smoke-test.sh @@ -61,7 +61,7 @@ urlenc() { echo "--- Health ---" check "wrapper health" "$BASE/health" '.status' 'ok' -check "pelias health proxy" "$BASE/health/pelias" '.status' 'ok' +check "photon-self health proxy" "$BASE/health/photon-self" '.status' 'ok' echo # --- 2. Forward geocoding --- diff --git a/services/mana-geocoding/src/__tests__/app.test.ts b/services/mana-geocoding/src/__tests__/app.test.ts index d6360b715..84c45d941 100644 --- a/services/mana-geocoding/src/__tests__/app.test.ts +++ b/services/mana-geocoding/src/__tests__/app.test.ts @@ -14,7 +14,6 @@ import type { Config } from '../config'; function baseConfig(overrides: Partial = {}): Config { return { port: 3018, - pelias: { apiUrl: 'http://127.0.0.1:1' }, photon: { apiUrl: 'https://photon.komoot.io' }, photonSelf: { apiUrl: undefined }, nominatim: { @@ -25,7 +24,7 @@ function baseConfig(overrides: Partial = {}): Config { cors: { origins: [] }, cache: { maxEntries: 100, ttlMs: 1000, publicTtlMs: 7000 }, providers: { - enabled: ['photon-self', 'pelias', 'photon', 'nominatim'], + enabled: ['photon-self', 'photon', 'nominatim'], healthCacheMs: 30_000, timeoutMs: 8000, }, diff --git a/services/mana-geocoding/src/app.ts b/services/mana-geocoding/src/app.ts index ca211d866..ac861d667 100644 --- a/services/mana-geocoding/src/app.ts +++ b/services/mana-geocoding/src/app.ts @@ -9,7 +9,6 @@ import type { Config } from './config'; import { RateLimiter } from './lib/rate-limiter'; import { ProviderChain } from './providers/chain'; import { NominatimProvider } from './providers/nominatim'; -import { PeliasProvider } from './providers/pelias'; import { PhotonProvider } from './providers/photon'; import type { GeocodingProvider, ProviderName } from './providers/types'; import { createGeocodeRoutes } from './routes/geocode'; @@ -47,18 +46,10 @@ export function createApp(config: Config): Hono { export function createChain(config: Config): ProviderChain { const built = new Map(); - built.set( - 'pelias', - new PeliasProvider({ - apiUrl: config.pelias.apiUrl, - timeoutMs: config.providers.timeoutMs, - }) - ); - // Self-hosted Photon (mana-gpu). Only registered when the env-var is set - // — pre-migration this stays absent and the chain falls through to - // public providers as before. Once the GPU server is running Photon, - // flip PHOTON_SELF_API_URL on and this becomes the primary provider. + // — without it the chain runs on public providers only. Once the GPU + // server is running Photon, flip PHOTON_SELF_API_URL on and this + // becomes the primary provider. if (config.photonSelf.apiUrl) { built.set( 'photon-self', diff --git a/services/mana-geocoding/src/config.ts b/services/mana-geocoding/src/config.ts index 07edfe769..ce4d9b0f1 100644 --- a/services/mana-geocoding/src/config.ts +++ b/services/mana-geocoding/src/config.ts @@ -6,10 +6,6 @@ import type { ProviderName } from './providers/types'; export interface Config { port: number; - pelias: { - /** Pelias API base URL (the API container, not the placeholder service) */ - apiUrl: string; - }; photon: { /** Photon base URL — public komoot endpoint by default. Used by * the `'photon'` provider slot which always has `privacy: 'public'`. */ @@ -20,7 +16,7 @@ export interface Config { * GPU server). When set, the wrapper registers a separate * `'photon-self'` provider with `privacy: 'local'` — eligible for * sensitive queries. When undefined, the slot is disabled and the - * chain only has the public providers (current pre-migration state). */ + * chain runs on public providers only. */ apiUrl: string | undefined; }; nominatim: { @@ -37,12 +33,13 @@ export interface Config { /** Max entries in the in-memory LRU cache */ maxEntries: number; /** Default TTL in milliseconds (24h — used for results from local - * providers like Pelias, where the index can be re-imported) */ + * providers like photon-self) */ ttlMs: number; - /** Extended TTL for results that came from public APIs (Photon, - * Nominatim). 7 days by default — caching aggressively reduces - * the number of times we forward query content to a third party, - * which is the main privacy lever we have over public providers. */ + /** TTL for results that came from public APIs (Photon, Nominatim). + * Capped at 1h so a brief blip in photon-self can't pin stale + * public-fallback answers in the cache for days. The privacy + * benefit of long TTLs (fewer outbound queries) is moot now that + * photon-self serves the bulk of traffic. */ publicTtlMs: number; }; providers: { @@ -60,9 +57,6 @@ export interface Config { export function loadConfig(): Config { return { port: parseInt(process.env.PORT || '3018', 10), - pelias: { - apiUrl: process.env.PELIAS_API_URL || 'http://localhost:4000/v1', - }, photon: { apiUrl: process.env.PHOTON_API_URL || 'https://photon.komoot.io', }, @@ -86,17 +80,15 @@ export function loadConfig(): Config { cache: { maxEntries: parseInt(process.env.CACHE_MAX_ENTRIES || '5000', 10), ttlMs: parseInt(process.env.CACHE_TTL_MS || String(24 * 60 * 60 * 1000), 10), - publicTtlMs: parseInt(process.env.CACHE_PUBLIC_TTL_MS || String(7 * 24 * 60 * 60 * 1000), 10), + publicTtlMs: parseInt(process.env.CACHE_PUBLIC_TTL_MS || String(60 * 60 * 1000), 10), }, providers: { // Default order (when GEOCODING_PROVIDERS is unset): try the // self-hosted Photon first if it's been configured, then public // providers as fallback. `photon-self` is silently dropped at - // chain-build time if `photonSelf.apiUrl` is undefined, so the - // list is the same shape regardless of migration status. + // chain-build time if `photonSelf.apiUrl` is undefined. enabled: parseProviderList(process.env.GEOCODING_PROVIDERS, [ 'photon-self', - 'pelias', 'photon', 'nominatim', ]), @@ -112,7 +104,7 @@ export function loadConfig(): Config { function parseProviderList(raw: string | undefined, fallback: ProviderName[]): ProviderName[] { if (!raw) return fallback; - const valid: ProviderName[] = ['pelias', 'photon-self', 'photon', 'nominatim']; + const valid: ProviderName[] = ['photon-self', 'photon', 'nominatim']; const parsed = raw .split(',') .map((s) => s.trim().toLowerCase()) diff --git a/services/mana-geocoding/src/index.ts b/services/mana-geocoding/src/index.ts index d4833e728..ae5b4b844 100644 --- a/services/mana-geocoding/src/index.ts +++ b/services/mana-geocoding/src/index.ts @@ -1,9 +1,8 @@ /** - * mana-geocoding — Self-hosted geocoding proxy. - * - * Wraps a local Pelias instance with caching and OSM → PlaceCategory - * mapping. All geocoding queries stay within our infrastructure — - * no user location data leaves the network. + * mana-geocoding — geocoding proxy with provider chain (photon-self → + * public photon → public nominatim) and aggressive caching. Sensitive + * queries are blocked from public providers; all forwarded queries are + * coordinate-quantized. */ import { createApp } from './app'; @@ -12,13 +11,17 @@ import { loadConfig } from './config'; const config = loadConfig(); console.log(`mana-geocoding starting on port ${config.port}...`); -console.log(`Pelias API: ${config.pelias.apiUrl}`); +console.log(`Providers: ${config.providers.enabled.join(', ')}`); +if (config.photonSelf.apiUrl) { + console.log(`photon-self: ${config.photonSelf.apiUrl}`); +} export default { port: config.port, - // Bun's default idleTimeout is 10s — too tight for Pelias cold queries - // that need to hit Elasticsearch and libpostal. 60s is generous enough - // for the worst-case while still cutting off stuck connections. + // Bun's default idleTimeout is 10s — too tight for cold cross-LAN + // queries to photon-self that hit OpenSearch on a fresh shard. 60s is + // generous enough for the worst case while still cutting off stuck + // connections. idleTimeout: 60, fetch: createApp(config).fetch, }; diff --git a/services/mana-geocoding/src/lib/__tests__/category-map.test.ts b/services/mana-geocoding/src/lib/__tests__/category-map.test.ts deleted file mode 100644 index 9ad9a10f2..000000000 --- a/services/mana-geocoding/src/lib/__tests__/category-map.test.ts +++ /dev/null @@ -1,184 +0,0 @@ -/** - * Unit tests for the Pelias→PlaceCategory mapping. - * - * This is the subtle part of the service: a Pelias venue often has - * multiple categories (e.g. a restaurant is `['food','retail','nightlife']`) - * and we need to pick the most specific one. The priority list in - * category-map.ts encodes that choice, and these tests lock it in. - */ - -import { describe, it, expect } from 'bun:test'; -import { mapPeliasToPlaceCategory } from '../category-map'; - -describe('mapPeliasToPlaceCategory', () => { - describe('priority-ordered multi-category resolution', () => { - it('picks food over retail for a restaurant', () => { - expect(mapPeliasToPlaceCategory(['food', 'retail', 'nightlife'])).toBe('food'); - }); - - it('picks food over retail for a bakery', () => { - // Bakery is tagged food+retail in the Pelias OSM taxonomy - expect(mapPeliasToPlaceCategory(['food', 'retail'])).toBe('food'); - }); - - it('picks food over nightlife for a cafe', () => { - expect(mapPeliasToPlaceCategory(['food', 'nightlife'])).toBe('food'); - }); - - it('picks transit over professional for a car_rental', () => { - // car_rental is tagged transport+professional in Pelias - expect(mapPeliasToPlaceCategory(['transport', 'professional'])).toBe('transit'); - }); - - it('picks transit for a bus_station (multiple transport subcategories)', () => { - expect(mapPeliasToPlaceCategory(['transport', 'transport:public', 'transport:bus'])).toBe( - 'transit' - ); - }); - - it('picks transit for a station (transport:rail)', () => { - expect( - mapPeliasToPlaceCategory([ - 'transport', - 'transport:public', - 'transport:station', - 'transport:rail', - ]) - ).toBe('transit'); - }); - }); - - describe('single-category resolution', () => { - it('maps food to food', () => { - expect(mapPeliasToPlaceCategory(['food'])).toBe('food'); - }); - - it('maps retail to shopping', () => { - expect(mapPeliasToPlaceCategory(['retail'])).toBe('shopping'); - }); - - it('maps transport to transit', () => { - expect(mapPeliasToPlaceCategory(['transport'])).toBe('transit'); - }); - - it('maps education to work', () => { - expect(mapPeliasToPlaceCategory(['education'])).toBe('work'); - }); - - it('maps professional to work', () => { - expect(mapPeliasToPlaceCategory(['professional'])).toBe('work'); - }); - - it('maps government to work', () => { - expect(mapPeliasToPlaceCategory(['government'])).toBe('work'); - }); - - it('maps finance to work', () => { - expect(mapPeliasToPlaceCategory(['finance'])).toBe('work'); - }); - - it('maps entertainment to leisure', () => { - expect(mapPeliasToPlaceCategory(['entertainment'])).toBe('leisure'); - }); - - it('maps nightlife to leisure', () => { - expect(mapPeliasToPlaceCategory(['nightlife'])).toBe('leisure'); - }); - - it('maps recreation to leisure', () => { - expect(mapPeliasToPlaceCategory(['recreation'])).toBe('leisure'); - }); - - it('maps health to other', () => { - expect(mapPeliasToPlaceCategory(['health'])).toBe('other'); - }); - - it('maps religion to other', () => { - expect(mapPeliasToPlaceCategory(['religion'])).toBe('other'); - }); - }); - - describe('real-world Pelias venue categories', () => { - // These are literal category arrays observed from the Konstanz DACH - // index during the 2026-04-11 deploy verification. Locking them in - // as regression tests so future priority changes can't silently - // break address search in production. - - it('Konzil Restaurant Konstanz → food', () => { - expect(mapPeliasToPlaceCategory(['food', 'retail', 'nightlife'])).toBe('food'); - }); - - it('Stuttgart Hauptbahnhof → transit', () => { - expect( - mapPeliasToPlaceCategory([ - 'transport', - 'transport:public', - 'transport:station', - 'transport:rail', - ]) - ).toBe('transit'); - }); - - it('Physiotherapie-Schule → work', () => { - expect(mapPeliasToPlaceCategory(['education'])).toBe('work'); - }); - - it('MX-Park (Rennstrecke) → leisure', () => { - expect(mapPeliasToPlaceCategory(['recreation'])).toBe('leisure'); - }); - - it('KulturKiosk → work', () => { - // KulturKiosk is tagged professional in Pelias - expect(mapPeliasToPlaceCategory(['professional'])).toBe('work'); - }); - - it('Kölner Domshop → shopping', () => { - expect(mapPeliasToPlaceCategory(['retail'])).toBe('shopping'); - }); - }); - - describe('empty / null / unknown categories', () => { - it('returns other for empty array', () => { - expect(mapPeliasToPlaceCategory([])).toBe('other'); - }); - - it('returns other for undefined', () => { - expect(mapPeliasToPlaceCategory(undefined)).toBe('other'); - }); - - it('returns other for null', () => { - expect(mapPeliasToPlaceCategory(null)).toBe('other'); - }); - - it('returns other for unknown category strings', () => { - expect(mapPeliasToPlaceCategory(['random', 'unknown'])).toBe('other'); - }); - - it('picks known category even if unknown ones come first', () => { - expect(mapPeliasToPlaceCategory(['unknown', 'food'])).toBe('food'); - }); - }); - - describe('Pelias layer fallback', () => { - it('uses layer hint for venue with no categories', () => { - expect(mapPeliasToPlaceCategory(undefined, 'venue')).toBe('other'); - }); - - it('uses layer hint for address', () => { - expect(mapPeliasToPlaceCategory(undefined, 'address')).toBe('other'); - }); - - it('uses layer hint for street', () => { - expect(mapPeliasToPlaceCategory(undefined, 'street')).toBe('other'); - }); - - it('uses layer hint for locality', () => { - expect(mapPeliasToPlaceCategory(undefined, 'locality')).toBe('other'); - }); - - it('prefers categories over layer hint', () => { - // A venue with food category should be food, not other - expect(mapPeliasToPlaceCategory(['food'], 'venue')).toBe('food'); - }); - }); -}); diff --git a/services/mana-geocoding/src/lib/__tests__/osm-category-map.test.ts b/services/mana-geocoding/src/lib/__tests__/osm-category-map.test.ts index 42b34a4b1..16ea96720 100644 --- a/services/mana-geocoding/src/lib/__tests__/osm-category-map.test.ts +++ b/services/mana-geocoding/src/lib/__tests__/osm-category-map.test.ts @@ -2,8 +2,6 @@ * Unit tests for the raw-OSM-tag → PlaceCategory mapper. * * Covers the cases Photon and Nominatim emit for typical DACH queries. - * The Pelias mapper has its own tests in category-map.test.ts; this file - * tests *only* the raw-OSM-tag path used by the public-API fallbacks. */ import { describe, expect, it } from 'bun:test'; @@ -54,7 +52,7 @@ describe('mapOsmTagToPlaceCategory', () => { expect(mapOsmTagToPlaceCategory('aeroway', 'aerodrome')).toBe('transit'); }); it('amenity:car_rental → transit', () => { - // Matches Pelias mapper's "car_rental → transit" decision + // car_rental → transit (transport-flavored) expect(mapOsmTagToPlaceCategory('amenity', 'car_rental')).toBe('transit'); }); }); @@ -116,7 +114,7 @@ describe('mapOsmTagToPlaceCategory', () => { describe('other (health/religion/unknown)', () => { it('amenity:hospital → other', () => { - // Health goes to other (matches Pelias mapper) + // Health goes to other expect(mapOsmTagToPlaceCategory('amenity', 'hospital')).toBe('other'); }); it('amenity:pharmacy → other', () => { diff --git a/services/mana-geocoding/src/lib/cache.ts b/services/mana-geocoding/src/lib/cache.ts index 6954312a7..c7a323e88 100644 --- a/services/mana-geocoding/src/lib/cache.ts +++ b/services/mana-geocoding/src/lib/cache.ts @@ -1,7 +1,7 @@ /** * Simple in-memory LRU cache with TTL for geocoding results. - * Geocoding results rarely change, so we cache aggressively to - * reduce load on the Pelias instance. + * Geocoding results rarely change, so we cache to reduce load on + * upstream providers. */ interface CacheEntry { @@ -37,11 +37,10 @@ export class LRUCache { /** * Insert or update a cache entry. * - * @param ttlOverrideMs Optional per-entry TTL. Useful when results - * from public-API providers should live longer than results from - * the (frequently-changing) local Pelias index — e.g. 7 days for - * Photon/Nominatim answers, 24 hours for Pelias answers. When - * omitted, the constructor's default TTL applies. + * @param ttlOverrideMs Optional per-entry TTL. The route layer uses + * this so public-fallback answers expire faster than local-provider + * answers — see `ttlFor()` in routes/geocode.ts. When omitted, the + * constructor's default TTL applies. */ set(key: string, value: T, ttlOverrideMs?: number): void { // Delete first so re-insert goes to end diff --git a/services/mana-geocoding/src/lib/category-map.ts b/services/mana-geocoding/src/lib/category-map.ts index 9aa1a4317..ce9fb7d5a 100644 --- a/services/mana-geocoding/src/lib/category-map.ts +++ b/services/mana-geocoding/src/lib/category-map.ts @@ -1,89 +1,10 @@ /** - * Maps Pelias categories (OSM taxonomy) to our 7 Places categories. - * - * Pelias' openstreetmap importer tags venues with categories from its - * built-in taxonomy (food, retail, transport, health, education, …). - * We collapse those into the simpler Places enum: + * The 7 Places categories used across the geocoding wrapper and clients. * * home · work · food · shopping · transit · leisure · other * - * A venue can have multiple Pelias categories (e.g. a restaurant is - * tagged `['food', 'retail', 'nightlife']`). We pick the most specific - * one in priority order rather than the first — a restaurant should be - * "food" even though "retail" also matches. + * Provider-specific mappers (see `osm-category-map.ts` for Photon / + * Nominatim) collapse the upstream taxonomy into this shape. `home` is + * never auto-detected — it's set manually by the user. */ - export type PlaceCategory = 'home' | 'work' | 'food' | 'shopping' | 'transit' | 'leisure' | 'other'; - -/** - * Priority-ordered: first matching category wins. Earlier entries are - * more specific, so "food" beats "retail", "transport" beats "professional". - */ -const PELIAS_PRIORITY: Array<[string, PlaceCategory]> = [ - // Food is strongest signal — a restaurant is food, not retail - ['food', 'food'], - - // Transit/transport - ['transport:public', 'transit'], - ['transport:air', 'transit'], - ['transport:sea', 'transit'], - ['transport:bus', 'transit'], - ['transport:taxi', 'transit'], - ['transport', 'transit'], - - // Shopping — explicit retail markers - ['retail', 'shopping'], - - // Leisure / entertainment / recreation - ['entertainment', 'leisure'], - ['nightlife', 'leisure'], - ['recreation', 'leisure'], - - // Work-ish - ['education', 'work'], - ['professional', 'work'], - ['government', 'work'], - ['finance', 'work'], - - // Health/religion fall through to other - ['health', 'other'], - ['religion', 'other'], -]; - -/** - * Derive a PlaceCategory from a Pelias feature's category array. - * - * @param categories The `category` array from a Pelias feature's properties - * @param peliasLayer The Pelias layer (venue, address, street, …) — used as fallback hint - */ -export function mapPeliasToPlaceCategory( - categories?: string[] | null, - peliasLayer?: string -): PlaceCategory { - if (Array.isArray(categories) && categories.length > 0) { - // Walk our priority list and pick the first match - for (const [peliasCat, placeCat] of PELIAS_PRIORITY) { - if (categories.includes(peliasCat)) return placeCat; - } - } - - // Fallback: use Pelias layer as a hint. Addresses/streets/regions - // all land in "other" since they aren't really "places" in the - // categorical sense. - if (peliasLayer) { - switch (peliasLayer) { - case 'venue': - return 'other'; - case 'address': - case 'street': - return 'other'; - case 'neighbourhood': - case 'locality': - case 'region': - case 'country': - return 'other'; - } - } - - return 'other'; -} diff --git a/services/mana-geocoding/src/lib/osm-category-map.ts b/services/mana-geocoding/src/lib/osm-category-map.ts index cb7a28890..5eea54ba0 100644 --- a/services/mana-geocoding/src/lib/osm-category-map.ts +++ b/services/mana-geocoding/src/lib/osm-category-map.ts @@ -2,15 +2,9 @@ * Maps raw OSM `class:type` tags (Photon's `osm_key:osm_value`, * Nominatim's `class:type`) to our 7 PlaceCategories. * - * Pelias has a curated multi-category taxonomy (`food`, `retail`, - * `transport`, …) that we map via `category-map.ts`. Photon and Nominatim - * return raw OSM tags instead — `amenity:restaurant`, `shop:supermarket`, - * `public_transport:station`, etc. — so they need a different lookup. - * * The list below is intentionally narrow: it only covers tags we actually * see in real Photon/Nominatim responses for DACH queries. Anything else - * falls through to `other`, which matches the Pelias mapper's behavior for - * unknown categories. + * falls through to `other`. * * If a query returns a tag we don't handle, that's the signal to add it * here — not to try to enumerate all 1000+ OSM types. @@ -25,8 +19,8 @@ interface Tag { /** * Priority-ordered: first match wins. More-specific entries (with a - * `value`) come before generic key-only entries. Matches Pelias's - * "food beats retail" priority intent. + * `value`) come before generic key-only entries. Same "food beats retail" + * priority intent as the upstream taxonomies. */ const OSM_RULES: Array<{ match: Tag; category: PlaceCategory }> = [ // ── Food (highest priority — restaurants are food, even when also @@ -82,7 +76,7 @@ const OSM_RULES: Array<{ match: Tag; category: PlaceCategory }> = [ { match: { key: 'amenity', value: 'embassy' }, category: 'work' }, { match: { key: 'office' }, category: 'work' }, - // ── Health / religion → other (matches Pelias mapper) ─────────── + // ── Health / religion → other ─────────────────────────────────── { match: { key: 'amenity', value: 'hospital' }, category: 'other' }, { match: { key: 'amenity', value: 'clinic' }, category: 'other' }, { match: { key: 'amenity', value: 'doctors' }, category: 'other' }, diff --git a/services/mana-geocoding/src/lib/privacy.ts b/services/mana-geocoding/src/lib/privacy.ts index 19e45e106..cbd6be263 100644 --- a/services/mana-geocoding/src/lib/privacy.ts +++ b/services/mana-geocoding/src/lib/privacy.ts @@ -14,7 +14,7 @@ * not telling Photon "user is at THIS HOUSE". Reverse geocoding * against the city block instead of the building is acceptable. * - * Pelias and other LAN-local providers always get the original + * Photon-self and other LAN-local providers always get the original * full-precision coordinates — quantization only applies on the way * out to the public internet. */ diff --git a/services/mana-geocoding/src/lib/sensitive-query.ts b/services/mana-geocoding/src/lib/sensitive-query.ts index 4eee1b02f..241f82a64 100644 --- a/services/mana-geocoding/src/lib/sensitive-query.ts +++ b/services/mana-geocoding/src/lib/sensitive-query.ts @@ -12,7 +12,7 @@ * * Trade-offs: * - False positives are OK (a user searching for "Praxis Müller" who - * wanted the dance studio gets 0 results when Pelias is down — not + * wanted the dance studio gets 0 results when photon-self is down — not * ideal but better than a privacy leak) * - False negatives are NOT OK (we'd rather over-block than under-block) * - The list is intentionally narrow: only words with clear medical or diff --git a/services/mana-geocoding/src/providers/__tests__/chain.test.ts b/services/mana-geocoding/src/providers/__tests__/chain.test.ts index ab4cc6a16..c8e5884ae 100644 --- a/services/mana-geocoding/src/providers/__tests__/chain.test.ts +++ b/services/mana-geocoding/src/providers/__tests__/chain.test.ts @@ -68,7 +68,7 @@ const SEARCH: SearchRequest = { q: 'test', limit: 5, lang: 'de' }; describe('ProviderChain — happy path', () => { it('returns the first provider that succeeds', async () => { - const a = new FakeProvider('pelias'); + const a = new FakeProvider('photon-self'); const b = new FakeProvider('photon'); const chain = new ProviderChain({ providers: [a, b], @@ -76,29 +76,29 @@ describe('ProviderChain — happy path', () => { }); const res = await chain.search(SEARCH); expect(res.ok).toBe(true); - expect(res.provider).toBe('pelias'); - expect(res.tried).toEqual(['pelias']); + expect(res.provider).toBe('photon-self'); + expect(res.tried).toEqual(['photon-self']); expect(a.calls.search).toBe(1); expect(b.calls.search).toBe(0); }); it('honors the providers array order', async () => { const photon = new FakeProvider('photon'); - const pelias = new FakeProvider('pelias'); + const local = new FakeProvider('photon-self'); // photon first this time const chain = new ProviderChain({ - providers: [photon, pelias], + providers: [photon, local], healthCacheMs: 60_000, }); const res = await chain.search(SEARCH); expect(res.provider).toBe('photon'); - expect(pelias.calls.search).toBe(0); + expect(local.calls.search).toBe(0); }); }); describe('ProviderChain — failover', () => { it('falls through on unreachable, returns next provider', async () => { - const a = new FakeProvider('pelias', { + const a = new FakeProvider('photon-self', { search: async () => ({ ok: false, kind: 'unreachable', status: 503 }), }); const b = new FakeProvider('photon'); @@ -106,7 +106,7 @@ describe('ProviderChain — failover', () => { const res = await chain.search(SEARCH); expect(res.ok).toBe(true); expect(res.provider).toBe('photon'); - expect(res.tried).toEqual(['pelias', 'photon']); + expect(res.tried).toEqual(['photon-self', 'photon']); }); it('falls through on rate_limited', async () => { @@ -121,20 +121,20 @@ describe('ProviderChain — failover', () => { it('STOPS on empty results — does not consume fallback budget', async () => { // A clean empty answer is definitive: don't burn through public APIs. - const a = new FakeProvider('pelias', { + const a = new FakeProvider('photon-self', { search: async () => ({ ok: true, results: [] }), }); const b = new FakeProvider('photon'); const chain = new ProviderChain({ providers: [a, b], healthCacheMs: 60_000 }); const res = await chain.search(SEARCH); expect(res.ok).toBe(true); - expect(res.provider).toBe('pelias'); + expect(res.provider).toBe('photon-self'); expect(res.results).toEqual([]); expect(b.calls.search).toBe(0); }); it('returns ok:false when all providers fail', async () => { - const a = new FakeProvider('pelias', { + const a = new FakeProvider('photon-self', { search: async () => ({ ok: false, kind: 'unreachable' }), }); const b = new FakeProvider('photon', { @@ -144,23 +144,23 @@ describe('ProviderChain — failover', () => { const res = await chain.search(SEARCH); expect(res.ok).toBe(false); expect(res.results).toEqual([]); - expect(res.tried).toEqual(['pelias', 'photon']); + expect(res.tried).toEqual(['photon-self', 'photon']); }); }); describe('ProviderChain — health cache', () => { it('skips a provider whose health probe returned false', async () => { - const dead = new FakeProvider('pelias', { health: async () => false }); + const dead = new FakeProvider('photon-self', { health: async () => false }); const alive = new FakeProvider('photon'); const chain = new ProviderChain({ providers: [dead, alive], healthCacheMs: 60_000 }); const res = await chain.search(SEARCH); - expect(res.tried).toEqual(['photon']); // pelias was skipped, not tried + expect(res.tried).toEqual(['photon']); // local was skipped, not tried expect(dead.calls.search).toBe(0); expect(dead.calls.health).toBe(1); }); it('caches health for healthCacheMs — only one probe per window', async () => { - const a = new FakeProvider('pelias'); + const a = new FakeProvider('photon-self'); const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 }); await chain.search(SEARCH); await chain.search(SEARCH); @@ -171,18 +171,19 @@ describe('ProviderChain — health cache', () => { it('marks provider unhealthy when search fails, skipping it next time', async () => { let failNext = true; - const flaky = new FakeProvider('pelias', { - search: async () => (failNext ? { ok: false, kind: 'unreachable' } : okResults('pelias')), + const flaky = new FakeProvider('photon-self', { + search: async () => + failNext ? { ok: false, kind: 'unreachable' } : okResults('photon-self'), }); const alive = new FakeProvider('photon'); const chain = new ProviderChain({ providers: [flaky, alive], healthCacheMs: 60_000 }); - // First call: pelias fails → cached unhealthy → photon serves + // First call: local fails → cached unhealthy → photon serves const r1 = await chain.search(SEARCH); expect(r1.provider).toBe('photon'); - expect(r1.tried).toEqual(['pelias', 'photon']); + expect(r1.tried).toEqual(['photon-self', 'photon']); - // Second call: pelias is in unhealthy cache, not tried at all + // Second call: local is in unhealthy cache, not tried at all failNext = false; // would now succeed but never gets called const r2 = await chain.search(SEARCH); expect(r2.provider).toBe('photon'); @@ -191,7 +192,7 @@ describe('ProviderChain — health cache', () => { }); it('refreshes health after cache expires', async () => { - const dead = new FakeProvider('pelias', { health: async () => false }); + const dead = new FakeProvider('photon-self', { health: async () => false }); const alive = new FakeProvider('photon'); // 1ms cache for fast test const chain = new ProviderChain({ providers: [dead, alive], healthCacheMs: 1 }); @@ -203,7 +204,7 @@ describe('ProviderChain — health cache', () => { }); it('clearHealthCache forces re-probe', async () => { - const a = new FakeProvider('pelias'); + const a = new FakeProvider('photon-self'); const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 }); await chain.search(SEARCH); expect(a.calls.health).toBe(1); @@ -215,19 +216,19 @@ describe('ProviderChain — health cache', () => { describe('ProviderChain — getHealthSnapshot', () => { it('reports per-provider health + age', async () => { - const ok = new FakeProvider('pelias'); + const ok = new FakeProvider('photon-self'); const dead = new FakeProvider('photon', { health: async () => false }); const chain = new ProviderChain({ providers: [ok, dead], healthCacheMs: 60_000 }); await chain.search(SEARCH); const snap = chain.getHealthSnapshot(); expect(snap).toHaveLength(2); - expect(snap[0]).toMatchObject({ name: 'pelias', healthy: true }); + expect(snap[0]).toMatchObject({ name: 'photon-self', healthy: true }); expect(snap[1]).toMatchObject({ name: 'photon', healthy: false }); expect(snap[0].ageMs).toBeLessThan(1000); }); it('reports Infinity age for never-probed providers', async () => { - const a = new FakeProvider('pelias'); + const a = new FakeProvider('photon-self'); const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 }); const snap = chain.getHealthSnapshot(); expect(snap[0].ageMs).toBe(Infinity); @@ -237,7 +238,7 @@ describe('ProviderChain — getHealthSnapshot', () => { describe('ProviderChain — reverse', () => { it('uses the same provider order for reverse', async () => { - const a = new FakeProvider('pelias', { + const a = new FakeProvider('photon-self', { reverse: async () => ({ ok: false, kind: 'unreachable' }), }); const b = new FakeProvider('photon', { privacy: 'public' }); @@ -251,26 +252,26 @@ describe('ProviderChain — reverse', () => { describe('ProviderChain — privacy / localOnly mode', () => { it('skips public providers when localOnly is true', async () => { - const localPelias = new FakeProvider('pelias', { privacy: 'local' }); + const localProvider = new FakeProvider('photon-self', { privacy: 'local' }); const publicPhoton = new FakeProvider('photon', { privacy: 'public' }); const publicNominatim = new FakeProvider('nominatim', { privacy: 'public' }); const chain = new ProviderChain({ - providers: [localPelias, publicPhoton, publicNominatim], + providers: [localProvider, publicPhoton, publicNominatim], healthCacheMs: 60_000, }); const res = await chain.search(SEARCH, undefined, { localOnly: true }); expect(res.ok).toBe(true); - expect(res.provider).toBe('pelias'); - expect(localPelias.calls.search).toBe(1); + expect(res.provider).toBe('photon-self'); + expect(localProvider.calls.search).toBe(1); // Public providers must not even have their search() called expect(publicPhoton.calls.search).toBe(0); expect(publicNominatim.calls.search).toBe(0); }); it('falls back to the second LOCAL provider when the first local fails', async () => { - const local1 = new FakeProvider('pelias', { + const local1 = new FakeProvider('photon-self', { privacy: 'local', search: async () => ({ ok: false, kind: 'unreachable' }), }); @@ -313,7 +314,7 @@ describe('ProviderChain — privacy / localOnly mode', () => { }); it('returns notice: fallback_used when a public provider serves a non-sensitive query', async () => { - const localDown = new FakeProvider('pelias', { + const localDown = new FakeProvider('photon-self', { privacy: 'local', health: async () => false, }); @@ -329,10 +330,10 @@ describe('ProviderChain — privacy / localOnly mode', () => { }); it('NO notice when the local provider serves a non-sensitive query', async () => { - const localUp = new FakeProvider('pelias', { privacy: 'local' }); + const localUp = new FakeProvider('photon-self', { privacy: 'local' }); const chain = new ProviderChain({ providers: [localUp], healthCacheMs: 60_000 }); const res = await chain.search(SEARCH); - expect(res.provider).toBe('pelias'); + expect(res.provider).toBe('photon-self'); expect(res.notice).toBeUndefined(); }); }); diff --git a/services/mana-geocoding/src/providers/__tests__/nominatim-normalizer.test.ts b/services/mana-geocoding/src/providers/__tests__/nominatim-normalizer.test.ts index e9bbb2559..b7d52cb1f 100644 --- a/services/mana-geocoding/src/providers/__tests__/nominatim-normalizer.test.ts +++ b/services/mana-geocoding/src/providers/__tests__/nominatim-normalizer.test.ts @@ -1,7 +1,7 @@ /** * Tests for normalizing Nominatim's flat-JSON shape into our GeocodingResult. * - * Nominatim differs from Photon/Pelias in three subtle ways we lock in: + * Nominatim differs from Photon in three subtle ways we lock in: * 1. Lat/lon are STRINGS, not numbers — the normalizer must parseFloat. * 2. Display name is a comma-noisy hierarchy ("Konzil, Hafenstraße, * Konstanz, Konstanz, Regierungsbezirk Freiburg, Baden-Württemberg, @@ -135,16 +135,4 @@ describe('normalizeNominatimResult', () => { }); expect(result.provider).toBe('nominatim'); }); - - it('does not set peliasCategories', () => { - // Consumer side keys off the absence of this field as a "fallback - // provider" signal. - const result = normalizeNominatimResult({ - lat: '47.0', - lon: '9.0', - class: 'amenity', - type: 'restaurant', - }); - expect(result.peliasCategories).toBeUndefined(); - }); }); diff --git a/services/mana-geocoding/src/providers/__tests__/photon-normalizer.test.ts b/services/mana-geocoding/src/providers/__tests__/photon-normalizer.test.ts index 89f59175e..877219b50 100644 --- a/services/mana-geocoding/src/providers/__tests__/photon-normalizer.test.ts +++ b/services/mana-geocoding/src/providers/__tests__/photon-normalizer.test.ts @@ -44,8 +44,6 @@ describe('normalizePhotonFeature', () => { }); expect(result.confidence).toBeCloseTo(0.78, 2); expect(result.provider).toBe('photon'); - // peliasCategories deliberately absent for non-Pelias providers - expect(result.peliasCategories).toBeUndefined(); }); it('builds label from structured fields', () => { @@ -111,7 +109,7 @@ describe('normalizePhotonFeature', () => { }); it('coordinates: Photon emits [lon, lat] — normalizer must NOT swap', () => { - // Catches the all-too-easy lon/lat flip when porting from Pelias. + // Catches the all-too-easy lon/lat flip in Photon's GeoJSON. const result = normalizePhotonFeature({ type: 'Feature', geometry: { type: 'Point', coordinates: [9.1758, 47.6634] }, diff --git a/services/mana-geocoding/src/providers/chain.ts b/services/mana-geocoding/src/providers/chain.ts index 07407f8c7..4e4d7985b 100644 --- a/services/mana-geocoding/src/providers/chain.ts +++ b/services/mana-geocoding/src/providers/chain.ts @@ -47,7 +47,7 @@ export type ChainNotice = /** Sensitive query was blocked from public providers and no local * provider was healthy → no results, but the absence is intentional. */ | 'sensitive_local_unavailable' - /** A non-Pelias provider served the request (Pelias was down). */ + /** A public provider served the request (the local provider was down). */ | 'fallback_used'; export interface ChainOptions { @@ -161,9 +161,9 @@ export class ProviderChain { } // Stale or missing — refresh. We don't await this aggressively in - // happy paths (Pelias up + healthy is the cheapest case), but on - // cold-start every entry is missing so the first request pays for - // one health probe per provider. + // happy paths (photon-self up + healthy is the cheapest case), + // but on cold-start every entry is missing so the first request + // pays for one health probe per provider. const healthy = await provider.health(signal); this.health.set(provider.name, { healthy, checkedAt: now }); if (!healthy) { diff --git a/services/mana-geocoding/src/providers/nominatim.ts b/services/mana-geocoding/src/providers/nominatim.ts index 590da3bc5..dfceca491 100644 --- a/services/mana-geocoding/src/providers/nominatim.ts +++ b/services/mana-geocoding/src/providers/nominatim.ts @@ -6,9 +6,9 @@ * search/reverse. A custom `User-Agent` is required (Nominatim returns * 403 to default-UA fetches). * - * Compared to Pelias/Photon, Nominatim returns a single flat array - * rather than GeoJSON. We adapt the shape and synthesize a confidence - * score from `importance`. + * Unlike Photon, Nominatim returns a single flat array rather than + * GeoJSON. We adapt the shape and synthesize a confidence score from + * `importance`. * * https://nominatim.org/release-docs/develop/api/Search/ * https://operations.osmfoundation.org/policies/nominatim/ diff --git a/services/mana-geocoding/src/providers/pelias.ts b/services/mana-geocoding/src/providers/pelias.ts deleted file mode 100644 index 4c0a5d0f3..000000000 --- a/services/mana-geocoding/src/providers/pelias.ts +++ /dev/null @@ -1,178 +0,0 @@ -/** - * Pelias provider — primary backend, self-hosted with the DACH OSM index. - * - * Forward-search uses /autocomplete first (fast venue match) and falls - * back to /search if autocomplete returns zero features (autocomplete - * deliberately excludes the address layer for perf). - */ - -import { mapPeliasToPlaceCategory } from '../lib/category-map'; -import type { - GeocodingProvider, - GeocodingResult, - ProviderResponse, - ReverseRequest, - SearchRequest, -} from './types'; - -export interface PeliasConfig { - apiUrl: string; - timeoutMs: number; -} - -export class PeliasProvider implements GeocodingProvider { - readonly name = 'pelias' as const; - readonly privacy = 'local' as const; - - constructor(private readonly config: PeliasConfig) {} - - async search(req: SearchRequest, signal?: AbortSignal): Promise { - const params = new URLSearchParams({ - text: req.q.trim(), - size: String(req.limit), - lang: req.lang, - }); - if (req.focusLat && req.focusLon) { - params.set('focus.point.lat', req.focusLat); - params.set('focus.point.lon', req.focusLon); - } - - // /autocomplete first (fast venue match), then /search if empty. - // Both attempts are wrapped in the same external timeout signal so - // a cumulative slow Pelias still falls through to the next provider. - try { - const ac = await this.fetch(`/autocomplete?${params}`, signal); - if (!ac.ok) return { ok: false, kind: 'unreachable', status: ac.status }; - let features = ac.features; - - if (features.length === 0) { - const s = await this.fetch(`/search?${params}`, signal); - if (s.ok) features = s.features; - // /search returning a non-OK after /autocomplete returned OK-but-empty - // is a clean zero-results answer, not a fall-through. We trust the - // successful autocomplete probe. - } - - return { ok: true, results: features.map(normalizePeliasFeature) }; - } catch (e) { - return { ok: false, kind: 'unreachable', error: errorMessage(e) }; - } - } - - async reverse(req: ReverseRequest, signal?: AbortSignal): Promise { - const params = new URLSearchParams({ - 'point.lat': req.lat, - 'point.lon': req.lon, - size: '3', - lang: req.lang, - }); - - try { - const r = await this.fetch(`/reverse?${params}`, signal); - if (!r.ok) return { ok: false, kind: 'unreachable', status: r.status }; - return { ok: true, results: r.features.map(normalizePeliasFeature) }; - } catch (e) { - return { ok: false, kind: 'unreachable', error: errorMessage(e) }; - } - } - - async health(signal?: AbortSignal): Promise { - try { - const url = `${this.config.apiUrl}/status`; - const res = await fetch(url, { - signal: combineSignals(signal, AbortSignal.timeout(this.config.timeoutMs)), - }); - // /v1/status doesn't exist on every Pelias version — a 404 still - // means the server is up. Anything else (5xx, ECONNREFUSED, timeout) - // is unhealthy. - return res.ok || res.status === 404; - } catch { - return false; - } - } - - private async fetch( - path: string, - signal?: AbortSignal - ): Promise<{ ok: boolean; status: number; features: PeliasFeature[] }> { - const res = await fetch(`${this.config.apiUrl}${path}`, { - signal: combineSignals(signal, AbortSignal.timeout(this.config.timeoutMs)), - }); - if (!res.ok) return { ok: false, status: res.status, features: [] }; - const data = (await res.json()) as PeliasResponse; - return { ok: true, status: res.status, features: data.features ?? [] }; - } -} - -// --- Pelias native types --- - -interface PeliasResponse { - type: 'FeatureCollection'; - features: PeliasFeature[]; -} - -interface PeliasFeature { - type: 'Feature'; - geometry: { - type: 'Point'; - coordinates: [number, number]; // [lon, lat] - }; - properties: { - id?: string; - name?: string; - label?: string; - confidence?: number; - layer?: string; - street?: string; - housenumber?: string; - postalcode?: string; - locality?: string; - region?: string; - country?: string; - category?: string[]; - }; -} - -export function normalizePeliasFeature(feature: PeliasFeature): GeocodingResult { - const props = feature.properties; - const [lon, lat] = feature.geometry.coordinates; - - return { - label: props.label || props.name || '', - name: props.name || '', - latitude: lat, - longitude: lon, - address: { - street: props.street, - houseNumber: props.housenumber, - postalCode: props.postalcode, - city: props.locality, - state: props.region, - country: props.country, - }, - category: mapPeliasToPlaceCategory(props.category, props.layer), - peliasCategories: props.category, - confidence: props.confidence ?? 0, - provider: 'pelias', - }; -} - -function errorMessage(e: unknown): string { - return e instanceof Error ? e.message : String(e); -} - -/** Combine an external AbortSignal with our own timeout signal. AbortSignal.any - * exists in Bun but TS typing is patchy across runtimes — small helper. */ -function combineSignals(...signals: Array): AbortSignal { - const real = signals.filter((s): s is AbortSignal => !!s); - if (real.length === 1) return real[0]; - const ctrl = new AbortController(); - for (const s of real) { - if (s.aborted) { - ctrl.abort(s.reason); - break; - } - s.addEventListener('abort', () => ctrl.abort(s.reason), { once: true }); - } - return ctrl.signal; -} diff --git a/services/mana-geocoding/src/providers/photon.ts b/services/mana-geocoding/src/providers/photon.ts index b695a4b8d..6122b0310 100644 --- a/services/mana-geocoding/src/providers/photon.ts +++ b/services/mana-geocoding/src/providers/photon.ts @@ -5,15 +5,10 @@ * importer). The HTTP shape is GeoJSON FeatureCollection with `properties` * holding `osm_key`/`osm_value` raw OSM tags + structured address fields. * - * Compared to Pelias: - * + No rate limit advertised, but be a polite neighbor: short timeouts, - * no retries, cache aggressively. - * + Reverse geocoding takes lon/lat (note the order — different from - * Pelias's point.lat/point.lon). Easy to flip if not careful. - * - No `confidence` field. We approximate from `importance` (0–1) when - * present, else 0.5 as a neutral default. - * - No DACH-specific tuning — German venue names sometimes lose umlauts - * in display labels. Acceptable for a fallback. + * Same class powers both `photon-self` (self-hosted, privacy: 'local') + * and `photon` (public komoot.io, privacy: 'public'). Reverse-geocoding + * takes lon/lat (note the order). Confidence is approximated from + * `importance` (0–1) when present, else 0.5 as a neutral default. */ import { mapOsmTagToPlaceCategory } from '../lib/osm-category-map'; @@ -207,9 +202,6 @@ export function normalizePhotonFeature( country: props.country, }, category, - // peliasCategories deliberately omitted — Photon has osm_key:osm_value - // but the consumer side keys off the absence of this field as a - // "result came from a fallback" signal. confidence: typeof props.importance === 'number' ? props.importance : 0.5, provider: providerName, }; diff --git a/services/mana-geocoding/src/providers/types.ts b/services/mana-geocoding/src/providers/types.ts index a57c2c861..c9e96117b 100644 --- a/services/mana-geocoding/src/providers/types.ts +++ b/services/mana-geocoding/src/providers/types.ts @@ -29,12 +29,8 @@ export interface GeocodingResult { }; /** Our Places category, derived from the provider's native taxonomy. */ category: PlaceCategory; - /** Raw Pelias categories (food, retail, transport, …) — only present - * when the result came from Pelias. Photon/Nominatim don't have an - * equivalent multi-tag taxonomy. */ - peliasCategories?: string[]; - /** Confidence score 0–1. Pelias provides this natively; Photon/Nominatim - * approximate it from `importance`. */ + /** Confidence score 0–1. Photon/Nominatim approximate it from + * `importance`. */ confidence: number; /** Which provider answered — useful for telemetry + UI hints * ("approximate match" badge for fallback providers). */ @@ -42,8 +38,8 @@ export interface GeocodingResult { } /** - * Provider identifiers. Two of these wrap the same `PhotonProvider` - * class with different configs: + * Provider identifiers. `photon-self` and `photon` both wrap the same + * `PhotonProvider` class with different configs: * * - `photon-self`: self-hosted Photon (typically on mana-gpu), * `privacy: 'local'`. Eligible for sensitive queries. @@ -55,7 +51,7 @@ export interface GeocodingResult { * tracks per-provider health. A single `photon` slot can't simultaneously * mean two different backends. */ -export type ProviderName = 'pelias' | 'photon-self' | 'photon' | 'nominatim'; +export type ProviderName = 'photon-self' | 'photon' | 'nominatim'; export interface SearchRequest { q: string; diff --git a/services/mana-geocoding/src/routes/geocode.ts b/services/mana-geocoding/src/routes/geocode.ts index 24c0f7077..b15f7f506 100644 --- a/services/mana-geocoding/src/routes/geocode.ts +++ b/services/mana-geocoding/src/routes/geocode.ts @@ -21,10 +21,10 @@ interface CachedAnswer { } /** - * TTL chooser. Public-API results (Photon/Nominatim) get the longer TTL — - * caching aggressively is the main privacy lever once the query has - * already left our network. Local results stay on the shorter TTL because - * the Pelias index can be re-imported; we don't want stale local data. + * TTL chooser. Public-API results (photon/nominatim) get a shorter TTL + * (1h) so a transient blip in photon-self doesn't pin stale fallback + * answers in the cache for days. Local results (photon-self) get the + * longer 24h TTL. * * Sensitive-query notices are cached on the short TTL too (the user might * retry from a different angle quickly), and `undefined` provider (chain diff --git a/services/mana-geocoding/src/routes/health.ts b/services/mana-geocoding/src/routes/health.ts index 26ec9cc7d..f0d46e0f1 100644 --- a/services/mana-geocoding/src/routes/health.ts +++ b/services/mana-geocoding/src/routes/health.ts @@ -9,35 +9,43 @@ export function createHealthRoutes(config: Config, chain: ProviderChain) { app.get('/', (c) => c.json({ status: 'ok', service: 'mana-geocoding' })); /** - * Upstream Pelias health. Proxies a request to the Pelias API so - * monitoring can reach it without `extra_hosts: host.docker.internal` - * on the blackbox exporter. + * Upstream photon-self health. Proxies a request to the self-hosted + * Photon so monitoring can reach it without `extra_hosts: + * host.docker.internal` on the blackbox exporter. * - * Backwards-compatible: existing prometheus probes against this - * endpoint keep working. Now reports `degraded` (200) instead of `down` - * (503) when Pelias is unreachable but a fallback provider is healthy - * — the system can still serve queries, just slower / less precise. + * Reports `degraded` (200) instead of `down` (503) when photon-self is + * unreachable but a public fallback (photon / nominatim) is healthy — + * the system can still serve queries, just at the cost of leaking the + * query content to a third party. */ - app.get('/pelias', async (c) => { + app.get('/photon-self', async (c) => { + const upstream = config.photonSelf.apiUrl; + if (!upstream) { + return c.json({ status: 'unconfigured', error: 'PHOTON_SELF_API_URL is unset' }, 503); + } try { - const res = await fetch(`${config.pelias.apiUrl}/status`, { + const res = await fetch(`${upstream}/api?q=Konstanz&limit=1`, { signal: AbortSignal.timeout(5000), }); - if (!res.ok && res.status !== 404) { + if (!res.ok) { return c.json( - { status: 'degraded', upstream: res.status, fallbackAvailable: chainHasFallback(chain) }, - chainHasFallback(chain) ? 200 : 503 + { + status: 'degraded', + upstream: res.status, + fallbackAvailable: chainHasPublicFallback(chain), + }, + chainHasPublicFallback(chain) ? 200 : 503 ); } - return c.json({ status: 'ok', upstream: 'pelias-api' }); + return c.json({ status: 'ok', upstream: 'photon-self' }); } catch (e) { return c.json( { - status: chainHasFallback(chain) ? 'degraded' : 'down', + status: chainHasPublicFallback(chain) ? 'degraded' : 'down', error: e instanceof Error ? e.message : 'unknown', - fallbackAvailable: chainHasFallback(chain), + fallbackAvailable: chainHasPublicFallback(chain), }, - chainHasFallback(chain) ? 200 : 503 + chainHasPublicFallback(chain) ? 200 : 503 ); } }); @@ -56,10 +64,10 @@ export function createHealthRoutes(config: Config, chain: ProviderChain) { } /** - * Check if any non-Pelias provider is currently believed healthy. Used - * to soften /pelias health to "degraded" instead of "down" when a - * fallback can still serve traffic. + * Check if any public fallback provider is currently believed healthy. + * Used to soften /photon-self health to "degraded" instead of "down" + * when a public fallback can still serve traffic. */ -function chainHasFallback(chain: ProviderChain): boolean { - return chain.getHealthSnapshot().some((p) => p.name !== 'pelias' && p.healthy); +function chainHasPublicFallback(chain: ProviderChain): boolean { + return chain.getHealthSnapshot().some((p) => p.name !== 'photon-self' && p.healthy); }