mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 16:41:08 +02:00
chore(geocoding): remove Pelias + close 3 bypass paths to public Nominatim
Pelias was retired from the Mac mini on 2026-04-28; photon-self (self-hosted Photon on mana-gpu) has been the live primary since then. This removes the now-dead Pelias adapter, config, tests, and the services/mana-geocoding/pelias/ stack — the entire compose file, the geojsonify_place_details.js patch, the setup.sh import script. Provider chain is now `photon-self → photon → nominatim`. The chain keeps its `privacy: 'local' | 'public'` split, sensitive-query blocking, coord quantization, and aggressive caching unchanged. Three direct calls to nominatim.openstreetmap.org that bypassed mana-geocoding now route through the wrapper: - citycorners/add-city + citycorners/cities/[slug]/add use the shared searchAddress() client (browser → same-origin proxy → mana-geocoding → photon-self). - memoro mobile drops its OSM reverse-geocoding fallback entirely; Expo's on-device reverse-geocoding stays as the sole path. Routing through the wrapper would require a memoro-server proxy endpoint — a follow-up if Expo's quality proves insufficient. Other behavioral changes: - CACHE_PUBLIC_TTL_MS dropped from 7d → 1h. The long TTL was a privacy-amplification trick from the Pelias era; with photon-self serving the bulk of traffic, a transient cross-LAN blip was pinning cached fallback answers for days. 1h gives quick recovery. - /health/pelias renamed to /health/photon-self; prometheus blackbox config + status-page generator updated. - mana-geocoding container no longer needs `extra_hosts: host.docker.internal:host-gateway` (was only there for the Pelias-on-host-network era). 113 tests passing. CLAUDE.md rewritten to reflect the post-Pelias architecture. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
7bca16dfa7
commit
2bbcf14aba
35 changed files with 330 additions and 1262 deletions
|
|
@ -1,9 +1,11 @@
|
|||
/**
|
||||
* Shared geocoding client for all modules in the unified Mana app.
|
||||
*
|
||||
* Talks to our self-hosted mana-geocoding service (Pelias-backed, port 3018).
|
||||
* All queries stay within our infrastructure — no user location data leaves
|
||||
* the network.
|
||||
* Talks to mana-geocoding (port 3018), which fronts a provider chain
|
||||
* (photon-self → public photon → public nominatim) with sensitive-query
|
||||
* blocking and coord quantization. Sensitive + happy-path queries stay
|
||||
* on our infrastructure via photon-self; only last-resort fallbacks
|
||||
* leave the network.
|
||||
*
|
||||
* Used by: places, events, contacts, photos, …
|
||||
*
|
||||
|
|
@ -66,26 +68,24 @@ export interface GeocodingResult {
|
|||
longitude: number;
|
||||
address: GeocodingAddress;
|
||||
category: PlaceCategory;
|
||||
/** Raw Pelias categories (food, retail, transport, …) — only present
|
||||
* when the result came from Pelias. */
|
||||
peliasCategories?: string[];
|
||||
confidence: number;
|
||||
/** Which backend served this result. `pelias` is local; `photon` and
|
||||
* `nominatim` are public APIs (the wrapper applies sensitive-query
|
||||
* blocking + coord quantization before forwarding to those). */
|
||||
provider?: 'pelias' | 'photon' | 'nominatim';
|
||||
/** Which backend served this result. `photon-self` is our self-hosted
|
||||
* Photon (privacy: 'local'); `photon` and `nominatim` are public APIs
|
||||
* (the wrapper applies sensitive-query blocking + coord quantization
|
||||
* before forwarding to those). */
|
||||
provider?: 'photon-self' | 'photon' | 'nominatim';
|
||||
}
|
||||
|
||||
/**
|
||||
* Out-of-band information returned alongside results — the wrapper uses
|
||||
* this to signal *why* a query had unusual behavior:
|
||||
*
|
||||
* - `'fallback_used'`: Pelias was unreachable, so a public-API provider
|
||||
* served the request. Results are still valid but may be less precise.
|
||||
* UI should show a subtle "approximate" badge.
|
||||
* - `'fallback_used'`: photon-self was unreachable, so a public-API
|
||||
* provider served the request. Results are still valid but may be
|
||||
* less precise. UI should show a subtle "approximate" badge.
|
||||
* - `'sensitive_local_unavailable'`: the query matched the wrapper's
|
||||
* sensitive-keyword list (medical / mental-health / crisis service)
|
||||
* AND the local Pelias was unreachable. The wrapper deliberately did
|
||||
* AND no local provider was reachable. The wrapper deliberately did
|
||||
* NOT forward the query to public APIs. Results are empty by design.
|
||||
* UI should explain this to the user.
|
||||
*/
|
||||
|
|
@ -95,7 +95,7 @@ interface GeocodingResponse {
|
|||
results: GeocodingResult[];
|
||||
cached?: boolean;
|
||||
error?: string;
|
||||
provider?: 'pelias' | 'photon' | 'nominatim';
|
||||
provider?: 'photon-self' | 'photon' | 'nominatim';
|
||||
notice?: GeocodingNotice;
|
||||
}
|
||||
|
||||
|
|
@ -109,7 +109,7 @@ interface GeocodingResponse {
|
|||
*/
|
||||
export interface SearchOutcome {
|
||||
results: GeocodingResult[];
|
||||
provider?: 'pelias' | 'photon' | 'nominatim';
|
||||
provider?: 'photon-self' | 'photon' | 'nominatim';
|
||||
notice?: GeocodingNotice;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
import { cityTable, useAllCities } from '$lib/modules/citycorners';
|
||||
import type { LocalCity } from '$lib/modules/citycorners/types';
|
||||
import { RoutePage } from '$lib/components/shell';
|
||||
import { searchAddress } from '$lib/geocoding';
|
||||
|
||||
const allCities = useAllCities();
|
||||
|
||||
|
|
@ -44,14 +45,10 @@
|
|||
geocoding = true;
|
||||
try {
|
||||
const searchQ = country.trim() ? `${q}, ${country.trim()}` : q;
|
||||
const res = await fetch(
|
||||
`https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(searchQ)}&limit=1`,
|
||||
{ headers: { 'User-Agent': 'CityCorners/1.0' } }
|
||||
);
|
||||
const results = await res.json();
|
||||
const results = await searchAddress(searchQ, { limit: 1 });
|
||||
if (results.length > 0) {
|
||||
latitude = parseFloat(results[0].lat);
|
||||
longitude = parseFloat(results[0].lon);
|
||||
latitude = results[0].latitude;
|
||||
longitude = results[0].longitude;
|
||||
}
|
||||
} catch {
|
||||
// best-effort
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
import { ccLocationTable, CATEGORY_KEYS } from '$lib/modules/citycorners';
|
||||
import type { LocalCity, LocalLocation } from '$lib/modules/citycorners/types';
|
||||
import { RoutePage } from '$lib/components/shell';
|
||||
import { searchAddress } from '$lib/geocoding';
|
||||
|
||||
const cityCtx = getContext<{ value: LocalCity | undefined }>('currentCity');
|
||||
let city = $derived(cityCtx.value);
|
||||
|
|
@ -58,14 +59,10 @@
|
|||
cityName && !addr.toLowerCase().includes(cityName.toLowerCase())
|
||||
? `${addr}, ${cityName}`
|
||||
: addr;
|
||||
const res = await fetch(
|
||||
`https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(q)}&limit=1`,
|
||||
{ headers: { 'User-Agent': 'CityCorners/1.0' } }
|
||||
);
|
||||
const results = await res.json();
|
||||
const results = await searchAddress(q, { limit: 1 });
|
||||
if (results.length > 0) {
|
||||
latitude = parseFloat(results[0].lat);
|
||||
longitude = parseFloat(results[0].lon);
|
||||
latitude = results[0].latitude;
|
||||
longitude = results[0].longitude;
|
||||
}
|
||||
} catch {
|
||||
// Geocoding is best-effort
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@
|
|||
* If we ever want to rate-limit by user we can add JWT verification here
|
||||
* without touching the upstream service.
|
||||
*
|
||||
* Also proxies /health and /health/pelias so the SvelteKit status page
|
||||
* (/status) can check the service from its server side.
|
||||
* Also proxies /health and /health/photon-self so the SvelteKit status
|
||||
* page (/status) can check the service from its server side.
|
||||
*/
|
||||
|
||||
import { error } from '@sveltejs/kit';
|
||||
|
|
|
|||
|
|
@ -155,71 +155,26 @@ export const reverseGeocodeWithExpo = async (
|
|||
};
|
||||
|
||||
/**
|
||||
* Führt ein Reverse Geocoding mit OpenStreetMap/Nominatim durch
|
||||
* @param latitude Breitengrad
|
||||
* @param longitude Längengrad
|
||||
* @returns Adressinformationen oder null bei Fehler
|
||||
*/
|
||||
export const reverseGeocodeWithOSM = async (
|
||||
latitude: number,
|
||||
longitude: number
|
||||
): Promise<AddressInfo | null> => {
|
||||
try {
|
||||
const url = `https://nominatim.openstreetmap.org/reverse?format=json&lat=${latitude}&lon=${longitude}&addressdetails=1`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent': 'Memoro App', // OSM erfordert einen User-Agent
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`OSM API responded with status: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data && data.address) {
|
||||
return {
|
||||
street: data.address.road || data.address.pedestrian || data.address.street,
|
||||
streetNumber: data.address.house_number,
|
||||
postalCode: data.address.postcode,
|
||||
city: data.address.city || data.address.town || data.address.village,
|
||||
district: data.address.suburb || data.address.neighbourhood,
|
||||
region: data.address.state,
|
||||
country: data.address.country,
|
||||
name: data.name,
|
||||
formattedAddress: data.display_name,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.debug('Fehler beim Reverse Geocoding mit OSM:', error);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Führt ein Reverse Geocoding durch und versucht, die beste verfügbare Adresse zu ermitteln
|
||||
* @param latitude Breitengrad
|
||||
* @param longitude Längengrad
|
||||
* @returns Adressinformationen oder null bei Fehler
|
||||
* Führt ein Reverse Geocoding durch. Nutzt ausschließlich Expo's
|
||||
* On-Device Reverse-Geocoding — keine direkten Calls an
|
||||
* nominatim.openstreetmap.org, weil das die User-IP + Coords ungeschützt
|
||||
* an einen Public-Service leakt. Wenn Expo keine Adresse liefert,
|
||||
* geben wir null zurück.
|
||||
*
|
||||
* Falls Expo's Qualität auf Dauer nicht reicht, ist der richtige Fix
|
||||
* ein Proxy-Endpoint im memoro-server, der intern an mana-geocoding
|
||||
* weiterreicht (Privacy-Hardening + Photon-Self).
|
||||
*/
|
||||
export const getAddressFromCoordinates = async (
|
||||
latitude: number,
|
||||
longitude: number
|
||||
): Promise<AddressInfo | null> => {
|
||||
try {
|
||||
// Zuerst mit Expo versuchen
|
||||
const expoResult = await reverseGeocodeWithExpo(latitude, longitude);
|
||||
|
||||
// Wenn Expo ein gutes Ergebnis liefert, dieses verwenden
|
||||
if (expoResult && expoResult.street && expoResult.city) {
|
||||
return expoResult;
|
||||
}
|
||||
|
||||
// Ansonsten mit OSM versuchen
|
||||
return await reverseGeocodeWithOSM(latitude, longitude);
|
||||
return expoResult;
|
||||
} catch (error) {
|
||||
console.debug('Fehler beim Reverse Geocoding:', error);
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -491,10 +491,9 @@ services:
|
|||
- "traefik.http.services.mana-events.loadbalancer.server.port=3065"
|
||||
|
||||
# ─── Geocoding ───────────────────────────────────────────
|
||||
# Thin Hono wrapper in front of a self-hosted Pelias stack.
|
||||
# Pelias itself (elasticsearch + api + libpostal) runs from a separate
|
||||
# compose file in services/mana-geocoding/pelias/ — see
|
||||
# services/mana-geocoding/CLAUDE.md for the initial import procedure.
|
||||
# Thin Hono wrapper in front of self-hosted Photon (mana-gpu) with
|
||||
# public photon.komoot.io and Nominatim as last-resort fallbacks.
|
||||
# See services/mana-geocoding/CLAUDE.md for provider-chain details.
|
||||
# Internal-only: no traefik labels, not exposed via Cloudflare.
|
||||
mana-geocoding:
|
||||
build:
|
||||
|
|
@ -504,14 +503,9 @@ services:
|
|||
container_name: mana-geocoding
|
||||
restart: always
|
||||
mem_limit: 128m
|
||||
# Pelias runs on host network via its own compose, so the wrapper
|
||||
# reaches it via host.docker.internal (Pelias API at :4000).
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
environment:
|
||||
TZ: Europe/Berlin
|
||||
PORT: 3018
|
||||
PELIAS_API_URL: http://host.docker.internal:4000/v1
|
||||
# Self-hosted Photon on mana-gpu (cross-LAN). When set, registers
|
||||
# as `photon-self` provider with privacy: 'local' — eligible for
|
||||
# sensitive queries. Empty value = slot disabled.
|
||||
|
|
|
|||
|
|
@ -314,9 +314,9 @@ scrape_configs:
|
|||
- targets:
|
||||
# mana-geocoding's own health (Hono wrapper)
|
||||
- http://mana-geocoding:3018/health
|
||||
# Upstream Pelias health, proxied through the wrapper so the
|
||||
# Upstream photon-self health, proxied through the wrapper so the
|
||||
# blackbox-exporter doesn't need host.docker.internal access.
|
||||
- http://mana-geocoding:3018/health/pelias
|
||||
- http://mana-geocoding:3018/health/photon-self
|
||||
# mana-ai (Mission Runner) — internal-only, no CF tunnel.
|
||||
- http://mana-ai:3066/health
|
||||
relabel_configs:
|
||||
|
|
|
|||
|
|
@ -69,8 +69,8 @@ friendly_name() {
|
|||
name="${name#http://}"
|
||||
# Interne Services (Docker-Netz): mana-geocoding:3018/health → Mana Geocoding
|
||||
case "$name" in
|
||||
mana-geocoding:*/health/pelias)
|
||||
name="Pelias (via Geocoding)"
|
||||
mana-geocoding:*/health/photon-self)
|
||||
name="Photon-Self (via Geocoding)"
|
||||
;;
|
||||
mana-geocoding:*)
|
||||
name="Mana Geocoding"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,12 @@
|
|||
# mana-geocoding
|
||||
|
||||
Geocoding service for the Places module. **Provider-chain architecture** — tries a self-hosted Pelias first, falls back to public Photon (komoot) and then public Nominatim (OSM) when Pelias is unhealthy or unreachable. All Pelias-served queries stay on our infrastructure; fallback queries leak the search string to a public OSM endpoint.
|
||||
Geocoding service for the Places module and other map-aware modules.
|
||||
**Provider-chain architecture** — tries self-hosted Photon (`photon-self`,
|
||||
on mana-gpu) first, falls back to public Photon (komoot) and then public
|
||||
Nominatim (OSM) when photon-self is unhealthy. All photon-self queries
|
||||
stay on our infrastructure; fallback queries leak the search string to a
|
||||
public OSM endpoint, with sensitive-query blocking + coord quantization
|
||||
+ aggressive caching as privacy mitigations.
|
||||
|
||||
## Tech Stack
|
||||
|
||||
|
|
@ -8,51 +14,51 @@ Geocoding service for the Places module. **Provider-chain architecture** — tri
|
|||
|-------|------------|
|
||||
| **Runtime** | Bun |
|
||||
| **Framework** | Hono |
|
||||
| **Primary geocoder** | Pelias (self-hosted, Elasticsearch-backed) |
|
||||
| **Primary geocoder** | Self-hosted Photon (`photon-self`, on mana-gpu via WSL2) |
|
||||
| **Fallback 1** | [Photon](https://photon.komoot.io) (public, no rate limit advertised) |
|
||||
| **Fallback 2** | [Nominatim](https://nominatim.openstreetmap.org) (public, 1 req/sec strict) |
|
||||
| **Data** | OpenStreetMap DACH extract (DE/AT/CH) for Pelias; global OSM for the public fallbacks |
|
||||
| **Caching** | In-memory LRU (5000 entries, 24h TTL) — applies to all provider answers |
|
||||
| **Data** | Photon-Europe pre-built index (Java JAR + embedded OpenSearch) |
|
||||
| **Caching** | In-memory LRU (5000 entries; 24h for `photon-self`, 1h for public answers) |
|
||||
|
||||
## Port: 3018
|
||||
|
||||
## Pelias has been retired
|
||||
|
||||
Pelias was the original primary backend (DACH OSM index, Elasticsearch +
|
||||
libpostal). It was stopped on 2026-04-28 because it ate ~3.2 GB RAM on
|
||||
the Mac mini and was crushing the host into 8.6 GB swap. The provider
|
||||
adapter, the JSON config patch hacks, and the entire `pelias/` stack
|
||||
were removed from this repo on the same day. See
|
||||
[`docs/reports/geocoding-self-hosting-2026-04-28.md`](../../docs/reports/geocoding-self-hosting-2026-04-28.md)
|
||||
for the decision rationale and the migration log with WSL2 gotchas.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# 1. Start Pelias stack (first time: run setup.sh for data import)
|
||||
cd services/mana-geocoding/pelias
|
||||
docker compose up -d
|
||||
# First time only:
|
||||
chmod +x setup.sh && ./setup.sh
|
||||
|
||||
# 2. Start the Hono wrapper
|
||||
cd services/mana-geocoding
|
||||
bun run dev
|
||||
```
|
||||
|
||||
The wrapper boots with no upstream of its own (it's a thin proxy in
|
||||
front of `photon-self` + public providers). For a real local-dev hit
|
||||
against `photon-self`, set `PHOTON_SELF_API_URL` to the GPU server
|
||||
(e.g. `http://192.168.178.11:2322`); otherwise the chain runs on the
|
||||
public providers only.
|
||||
|
||||
## API Endpoints
|
||||
|
||||
All endpoints are public (no auth required) — the service is internal-only, not exposed to the internet.
|
||||
All endpoints are public (no auth required) — the service is internal-only,
|
||||
not exposed to the internet. The web app reaches it via a same-origin
|
||||
proxy at `apps/mana/apps/web/src/routes/api/v1/geocode/[...path]/+server.ts`.
|
||||
|
||||
| Method | Path | Description |
|
||||
|--------|------|-------------|
|
||||
| GET | `/api/v1/geocode/search?q=...` | Forward geocoding / autocomplete |
|
||||
| GET | `/api/v1/geocode/reverse?lat=...&lon=...` | Reverse geocoding |
|
||||
| GET | `/api/v1/geocode/stats` | Cache statistics |
|
||||
| GET | `/api/v1/geocode/stats` | Cache statistics + provider snapshot |
|
||||
| GET | `/health` | Wrapper health |
|
||||
| GET | `/health/pelias` | Upstream Pelias health (used by blackbox monitoring) |
|
||||
|
||||
### Forward-search strategy
|
||||
|
||||
The wrapper queries Pelias `/autocomplete` first (fast, fuzzy, optimised for
|
||||
venue names like "Konzil Restaurant"). If that returns zero features, it
|
||||
falls back to `/search`, which covers the address layer that autocomplete
|
||||
deliberately excludes as a performance optimisation.
|
||||
|
||||
This gives the best of both worlds: quick venue matches for free-text
|
||||
queries AND reliable results for street-style queries like "Marktstätte
|
||||
Konstanz". See `src/routes/geocode.ts` — the fallback is baked into the
|
||||
forward handler.
|
||||
| GET | `/health/photon-self` | Upstream `photon-self` health (used by blackbox monitoring) |
|
||||
| GET | `/health/providers` | Per-provider health snapshot |
|
||||
|
||||
### Search params
|
||||
|
||||
|
|
@ -78,7 +84,7 @@ forward handler.
|
|||
{
|
||||
"results": [
|
||||
{
|
||||
"label": "Münster Café, Münsterplatz 3, 78462 Konstanz",
|
||||
"label": "Münster Café, Münsterplatz 3, 78462 Konstanz, Deutschland",
|
||||
"name": "Münster Café",
|
||||
"latitude": 47.663,
|
||||
"longitude": 9.175,
|
||||
|
|
@ -87,75 +93,53 @@ forward handler.
|
|||
"houseNumber": "3",
|
||||
"postalCode": "78462",
|
||||
"city": "Konstanz",
|
||||
"country": "Germany"
|
||||
"state": "Baden-Württemberg",
|
||||
"country": "Deutschland"
|
||||
},
|
||||
"category": "food",
|
||||
"peliasCategories": ["food", "retail", "nightlife"],
|
||||
"confidence": 0.95
|
||||
"confidence": 0.78,
|
||||
"provider": "photon-self"
|
||||
}
|
||||
]
|
||||
],
|
||||
"provider": "photon-self",
|
||||
"tried": ["photon-self"]
|
||||
}
|
||||
```
|
||||
|
||||
The response body includes `provider: 'photon-self' | 'photon' | 'nominatim'`,
|
||||
`tried: ProviderName[]`, and an optional `notice`
|
||||
(`'fallback_used'` or `'sensitive_local_unavailable'`) so the caller can
|
||||
render an "approximate match" hint or explain why a sensitive query
|
||||
returned 0 results.
|
||||
|
||||
## Category Mapping
|
||||
|
||||
Pelias' OSM importer tags each venue with its own taxonomy (`food`, `retail`,
|
||||
`transport`, `health`, `education`, …). We collapse those into the 7
|
||||
PlaceCategories used by the Places module, using a **priority-ordered list**
|
||||
so the most specific signal wins:
|
||||
Photon and Nominatim emit raw OSM tags (`amenity:restaurant`,
|
||||
`shop:supermarket`, `public_transport:station`, …) which we collapse
|
||||
into the 7 PlaceCategories used by the Places module. Mapping logic in
|
||||
`src/lib/osm-category-map.ts` — priority-ordered so the most specific
|
||||
signal wins (e.g. `amenity:restaurant` → `food` even if also tagged as
|
||||
`shop`).
|
||||
|
||||
| PlaceCategory | Wins if Pelias categories contain |
|
||||
|---------------|-----------------------------------|
|
||||
| `food` | `food` (beats retail/nightlife — a restaurant is food) |
|
||||
| `transit` | `transport`, `transport:public`, `transport:air`, `transport:bus`, `transport:taxi`, `transport:sea` |
|
||||
| `shopping` | `retail` (when no `food` present) |
|
||||
| `leisure` | `entertainment`, `nightlife`, `recreation` |
|
||||
| `work` | `education`, `professional`, `government`, `finance` |
|
||||
| `other` | `health`, `religion`, everything else |
|
||||
| PlaceCategory | Wins for tags |
|
||||
|---------------|---------------|
|
||||
| `food` | `amenity:restaurant`, `amenity:cafe`, `amenity:fast_food`, `amenity:bar`, `amenity:pub`, `amenity:bakery` |
|
||||
| `transit` | `amenity:bus_station`, `public_transport:station`, `railway:station`, `aeroway:terminal`, `amenity:car_rental` |
|
||||
| `shopping` | `shop` (any value) |
|
||||
| `leisure` | `leisure` (most), `tourism:attraction`, `amenity:cinema`, `amenity:theatre` |
|
||||
| `work` | `office`, `amenity:bank`, `amenity:townhall`, `amenity:embassy`, `amenity:school`, `amenity:university` |
|
||||
| `other` | health (`amenity:hospital`, `amenity:clinic`, `healthcare:*`), religion (`amenity:place_of_worship`), addresses, fall-through |
|
||||
| `home` | (not auto-detected — set manually by the user) |
|
||||
|
||||
**Example mappings verified on the DACH index:**
|
||||
|
||||
| OSM venue | Pelias categories | → PlaceCategory |
|
||||
|-----------|-------------------|-----------------|
|
||||
| Konzil Konstanz Restaurant | `[food, retail, nightlife]` | `food` |
|
||||
| Bahnhof Konstanz | `[transport, transport:station]` | `transit` |
|
||||
| Physiotherapie-Schule | `[education]` | `work` |
|
||||
| MX-Park (Rennstrecke) | `[recreation]` | `leisure` |
|
||||
|
||||
The priority list lives in `src/lib/category-map.ts` — update it if you want
|
||||
a Pelias category to map somewhere else.
|
||||
|
||||
### Critical: the Pelias API patch
|
||||
|
||||
By default, Pelias **hides** the `category` field from API responses unless
|
||||
the caller explicitly passes `?categories=...` — a quirk intended for keyword
|
||||
filtering that also strips category metadata from normal address queries. We
|
||||
work around this by mounting a **patched copy** of
|
||||
`helper/geojsonify_place_details.js` over the upstream one in the `pelias-api`
|
||||
container (`pelias/geojsonify_place_details.js`). The patch changes
|
||||
`condition: checkCategoryParam` → `condition: () => true` so the category
|
||||
array always flows through to the wrapper.
|
||||
|
||||
If you bump the `pelias/api` image, regenerate the patched file:
|
||||
|
||||
```bash
|
||||
cd services/mana-geocoding/pelias
|
||||
docker run --rm pelias/api:latest cat /code/pelias/api/helper/geojsonify_place_details.js \
|
||||
| sed 's|condition: checkCategoryParam|condition: () => true|' \
|
||||
> geojsonify_place_details.js
|
||||
docker compose up -d --force-recreate api
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
```env
|
||||
PORT=3018
|
||||
|
||||
# --- Provider chain (tried in order) ----------------------------------
|
||||
# Default order: photon-self,pelias,photon,nominatim
|
||||
# Default order: photon-self,photon,nominatim
|
||||
# `photon-self` is silently dropped if PHOTON_SELF_API_URL is unset.
|
||||
GEOCODING_PROVIDERS=photon-self,pelias,photon,nominatim
|
||||
GEOCODING_PROVIDERS=photon-self,photon,nominatim
|
||||
PROVIDER_TIMEOUT_MS=8000 # per-provider request timeout (cold-start safe)
|
||||
PROVIDER_HEALTH_CACHE_MS=30000 # health-cache TTL — skip dead providers
|
||||
|
||||
|
|
@ -165,13 +149,10 @@ PROVIDER_HEALTH_CACHE_MS=30000 # health-cache TTL — skip dead providers
|
|||
# Set in .env.macmini; flow into the container via docker-compose env.
|
||||
PHOTON_SELF_API_URL=http://192.168.178.11:2322
|
||||
|
||||
# --- Pelias (legacy, currently stopped — privacy: 'local') ------------
|
||||
PELIAS_API_URL=http://pelias-api:4000/v1
|
||||
|
||||
# --- Public Photon (privacy: 'public', last-resort fallback) ----------
|
||||
PHOTON_API_URL=https://photon.komoot.io
|
||||
|
||||
# --- Nominatim (fallback 2) -------------------------------------------
|
||||
# --- Nominatim (last-resort fallback) ---------------------------------
|
||||
NOMINATIM_API_URL=https://nominatim.openstreetmap.org
|
||||
NOMINATIM_USER_AGENT=mana-geocoding/1.0 (+https://mana.how; kontakt@memoro.ai)
|
||||
NOMINATIM_INTERVAL_MS=1100 # >= 1000 to honor 1 req/sec policy
|
||||
|
|
@ -180,7 +161,9 @@ NOMINATIM_INTERVAL_MS=1100 # >= 1000 to honor 1 req/sec policy
|
|||
CORS_ORIGINS=http://localhost:5173,https://mana.how
|
||||
CACHE_MAX_ENTRIES=5000
|
||||
CACHE_TTL_MS=86400000 # 24h — used for local-provider answers
|
||||
CACHE_PUBLIC_TTL_MS=604800000 # 7d — extended TTL for public-API answers (privacy)
|
||||
CACHE_PUBLIC_TTL_MS=3600000 # 1h — short TTL for public-API answers so a
|
||||
# transient photon-self blip doesn't pin
|
||||
# stale fallback answers in cache for days.
|
||||
```
|
||||
|
||||
To **disable a provider**, drop it from `GEOCODING_PROVIDERS`. To run with
|
||||
|
|
@ -195,9 +178,7 @@ The dual-Photon split:
|
|||
fallback for non-sensitive queries when self-hosted is down.
|
||||
|
||||
Both share the same `PhotonProvider` class — only the URL, name, and
|
||||
privacy stance differ. See the [migration runbook](../../docs/runbooks/photon-on-mana-gpu.md)
|
||||
and [decision report](../../docs/reports/geocoding-self-hosting-2026-04-28.md)
|
||||
for the operational story.
|
||||
privacy stance differ.
|
||||
|
||||
## Provider-chain semantics
|
||||
|
||||
|
|
@ -213,40 +194,44 @@ skips it for the rest of the cache window. The next request after the cache
|
|||
expires re-probes lazily — there is no background health pinger.
|
||||
|
||||
```
|
||||
Client (Places module)
|
||||
Client (Places module, etc.)
|
||||
→ mana-geocoding (Hono, port 3018)
|
||||
→ LRU cache (24h TTL) ← hit: ~0 ms
|
||||
→ LRU cache (24h local / 1h public) ← hit: ~0 ms
|
||||
→ Provider chain
|
||||
1. Pelias ← reachable: 50–200 ms (DACH index, fully featured)
|
||||
2. Photon ← fallback: 200–500 ms public, partial features
|
||||
3. Nominatim ← last resort: 200–800 ms + 1 req/sec queue
|
||||
1. photon-self ← reachable: 50–200 ms (cross-LAN to mana-gpu)
|
||||
2. photon ← public fallback: 200–500 ms
|
||||
3. nominatim ← last resort: 200–800 ms + 1 req/sec queue
|
||||
```
|
||||
|
||||
The response body includes `provider: 'pelias' | 'photon' | 'nominatim'`,
|
||||
`tried: ProviderName[]`, and an optional `notice` (`'fallback_used'` or
|
||||
`'sensitive_local_unavailable'`) so the caller can render an
|
||||
"approximate match" hint or explain why a sensitive query returned 0
|
||||
results.
|
||||
### Why the public TTL is short (1h)
|
||||
|
||||
When photon-self has a transient cross-LAN blip and a request falls
|
||||
through to public photon, the public answer used to be cached for 7 days
|
||||
— pinning the cached fallback even after photon-self recovered. With
|
||||
the 1h TTL the chain returns to photon-self within an hour. The privacy
|
||||
benefit of long TTLs (fewer outbound queries) is moot now that
|
||||
photon-self serves the bulk of traffic; only fallback answers go through
|
||||
public providers.
|
||||
|
||||
## Privacy hardening
|
||||
|
||||
When a request goes to Pelias, the user's query content + focus point
|
||||
stay on our infrastructure. When it falls through to Photon or
|
||||
Nominatim, the query is forwarded to a third party. Three independent
|
||||
defenses limit what those third parties can learn:
|
||||
When a request goes to `photon-self`, the user's query content + focus
|
||||
point stay on our infrastructure. When it falls through to public
|
||||
Photon or Nominatim, the query is forwarded to a third party. Three
|
||||
independent defenses limit what those third parties can learn:
|
||||
|
||||
### 1. Sensitive-query block (`src/lib/sensitive-query.ts`)
|
||||
|
||||
Queries matching the medical / mental-health / crisis-service keyword
|
||||
list (`Hausarzt`, `Psychiater`, `Klinikum`, `Suchtberatung`, `HIV`,
|
||||
`Frauenhaus`, …) are **never forwarded to public APIs**, even if Pelias
|
||||
is unreachable. The chain detects sensitivity at the route layer and
|
||||
calls `chain.search(req, signal, { localOnly: true })` — providers with
|
||||
`privacy: 'public'` are filtered out *before* the iteration begins, so
|
||||
there is no race window.
|
||||
`Frauenhaus`, …) are **never forwarded to public APIs**, even if
|
||||
photon-self is unreachable. The chain detects sensitivity at the route
|
||||
layer and calls `chain.search(req, signal, { localOnly: true })` —
|
||||
providers with `privacy: 'public'` are filtered out *before* the
|
||||
iteration begins, so there is no race window.
|
||||
|
||||
When no local provider is available (e.g. Pelias is stopped), a
|
||||
sensitive query returns `ok: true, results: [], notice:
|
||||
When no local provider is available (e.g. `PHOTON_SELF_API_URL` is
|
||||
unset), a sensitive query returns `ok: true, results: [], notice:
|
||||
'sensitive_local_unavailable'`. The UI should show "Diese Suche bleibt
|
||||
bewusst lokal — kein Treffer im DACH-Index. Versuche eine allgemeinere
|
||||
Formulierung." rather than "no results".
|
||||
|
|
@ -267,17 +252,18 @@ Coordinates are rounded before forwarding to public providers:
|
|||
City-block resolution — sufficient for "what's near me?", avoids
|
||||
logging exact home/workplace coordinates to a third party.
|
||||
|
||||
Pelias always gets full-precision coordinates — quantization only
|
||||
applies on the way out to public APIs.
|
||||
`photon-self` always gets full-precision coordinates — quantization
|
||||
only applies on the way out to public APIs.
|
||||
|
||||
### 3. Aggressive caching of public-API answers
|
||||
### 3. Caching of public-API answers
|
||||
|
||||
`config.cache.publicTtlMs` (default 7 days) overrides the default 24h
|
||||
cache TTL when the response came from a public provider. Same query
|
||||
from 1000 different users → 1 outbound request to Photon/Nominatim.
|
||||
This is the strongest privacy lever we have over public providers,
|
||||
since we can't change their logging behavior — only the rate at which
|
||||
we feed them queries.
|
||||
`config.cache.publicTtlMs` (default 1h) overrides the default 24h cache
|
||||
TTL when the response came from a public provider. Same query from
|
||||
multiple users within an hour → 1 outbound request to Photon/Nominatim.
|
||||
The TTL is short by design (see "Why the public TTL is short" above) —
|
||||
the strong caching lever was an artifact of the era when public Photon
|
||||
was THE fallback for a stopped Pelias; today it's a last-resort fallback
|
||||
behind a healthy photon-self.
|
||||
|
||||
### What this protects + what it doesn't
|
||||
|
||||
|
|
@ -286,8 +272,8 @@ we feed them queries.
|
|||
| Public API sees user's IP | ✓ (wrapper is the proxy, only mac-mini IP goes out) |
|
||||
| Public API sees user identity / JWT | ✓ (wrapper sends no auth headers) |
|
||||
| Public API sees query content | partial — sensitive queries blocked entirely, others go through |
|
||||
| Public API sees user's exact GPS | ✓ (quantized to ~1km / ~110m) |
|
||||
| Aggregate location-intent profiling | partial — cache reduces volume ~10–100× |
|
||||
| Public API sees user's exact GPS | ✓ (quantized to ~1 km / ~110 m) |
|
||||
| Aggregate location-intent profiling | partial — cache reduces volume modestly |
|
||||
| TLS-level traffic analysis (timing) | ✗ (not in scope) |
|
||||
| Compelled disclosure of public-API logs | ✗ (no legal mitigation) |
|
||||
|
||||
|
|
@ -295,89 +281,42 @@ Residual risk for non-sensitive queries: "third party learns what
|
|||
queries our backend made, with timestamps, but not who made them."
|
||||
Acceptable for restaurant/landmark lookups, blocked for medical lookups.
|
||||
|
||||
## Pelias Infrastructure
|
||||
## photon-self infrastructure
|
||||
|
||||
The Pelias stack runs as a separate docker-compose in `pelias/`:
|
||||
Photon runs on **mana-gpu** (Windows 11 + WSL2 + Docker), as a Java JAR
|
||||
inside `eclipse-temurin:21-jre` with the unpacked Photon-Europe data
|
||||
directory (~80 GB) mounted in. Cross-LAN reachable from the Mac mini via
|
||||
WSL2 mirrored networking on `192.168.178.11:2322`.
|
||||
|
||||
- **elasticsearch** — Index storage (Docker volume, ~5GB for DACH after
|
||||
indexing 13.4M OSM objects — 10M addresses + 3.3M venues)
|
||||
- **api** — HTTP API (port 4000), patched for category passthrough
|
||||
- **libpostal** — Address parsing (internal only, not exposed on host port
|
||||
because 4400 collides with mana-infra-landings on the Mac Mini)
|
||||
- **Import containers** — Run once for initial data load, then stopped
|
||||
Operator scripts for the weekly DB refresh live in
|
||||
`services/mana-geocoding/photon-self/`:
|
||||
|
||||
**Production RAM usage** (measured on the Mac Mini after the 2026-04-11 deploy):
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `photon-update.sh` | Atomic-swap update script — downloads new tarball, unpacks, restarts the container, rolls back on failure. Installed on mana-gpu at `/usr/local/bin/photon-update.sh`. |
|
||||
| `photon-update.service` | systemd oneshot unit that runs `photon-update.sh`. |
|
||||
| `photon-update.timer` | systemd timer (Sun 03:30 + 30min jitter, `Persistent=true`). |
|
||||
| `README.md` | Re-installation steps for DR scenarios + manual test commands. |
|
||||
|
||||
| Container | RAM |
|
||||
|---|---|
|
||||
| pelias-elasticsearch | ~1.2 GB |
|
||||
| pelias-libpostal | ~1.9 GB (address parser model) |
|
||||
| pelias-api | ~100 MB |
|
||||
| mana-geocoding (wrapper) | ~20–60 MB |
|
||||
|
||||
Total: **~3.2 GB** — larger than the initial ~1.5 GB estimate because
|
||||
libpostal loads its full address parser into memory up front.
|
||||
|
||||
### Initial import (one-time)
|
||||
|
||||
The DACH PBF extract is ~5GB and takes 30-45 minutes to index. See
|
||||
`pelias/setup.sh` for the full pipeline. Key steps, in order:
|
||||
|
||||
1. `docker compose up -d` — bring up ES, api, libpostal
|
||||
2. `docker exec pelias-elasticsearch elasticsearch-plugin install analysis-icu`
|
||||
then restart — the official ES image doesn't ship `analysis-icu` which
|
||||
Pelias' schema mapping requires
|
||||
3. `docker compose --profile import run --rm schema ./bin/create_index`
|
||||
4. `docker compose --profile import run --rm openstreetmap ./bin/download`
|
||||
(downloads `dach-latest.osm.pbf` from Geofabrik, ~5GB)
|
||||
5. **Rename** `dach-latest.osm.pbf` → `planet-latest.osm.pbf` inside the
|
||||
pelias-data volume (Pelias' importer expects that filename). The
|
||||
`pelias.json` config references it as `planet-latest.osm.pbf` too.
|
||||
6. `docker compose --profile import run --rm openstreetmap ./bin/start`
|
||||
(22M objects, ~30 min on an M2 Mac mini)
|
||||
|
||||
### pelias.json gotchas
|
||||
|
||||
A few non-obvious settings required for a self-hosted DACH deployment:
|
||||
|
||||
- **`adminLookup.enabled: false`** — Pelias tries to resolve country/region
|
||||
hierarchies via "Who's On First" data by default. We don't import WOF,
|
||||
so this must be disabled or import crashes with `unable to locate sqlite
|
||||
folder`.
|
||||
- **`leveldbpath: "/data/leveldb"`** — not `/tmp/leveldb`; the container
|
||||
user (1001) needs write access and `/tmp` is not mounted.
|
||||
- **`api.services.libpostal: { url: "..." }`** — must be an object, not a
|
||||
string. The API's Joi schema rejects the string form.
|
||||
- **Only declare services you actually run.** We used to list `placeholder`,
|
||||
`pip`, and `interpolation` in `api.services` but never ran the containers;
|
||||
Pelias logged `ENOTFOUND` errors on every query. Dropping the unused
|
||||
entries makes Pelias degrade cleanly to libpostal-only parsing (warns
|
||||
`service disabled` once at startup, then silent).
|
||||
- **No `defaultParameters.boundary.country`** — Pelias only accepts a
|
||||
single country value for `boundary.country`. Since our index only
|
||||
contains DACH data anyway, we drop the filter entirely.
|
||||
- **`features: { filename: "planet-latest.osm.pbf" }`** — required because
|
||||
Geofabrik downloads come named `dach-latest.osm.pbf`, but Pelias'
|
||||
openstreetmap importer looks for `planet-latest.osm.pbf` by default.
|
||||
The migration log + 5 WSL2 gotchas are documented in
|
||||
[`docs/reports/geocoding-self-hosting-2026-04-28.md`](../../docs/reports/geocoding-self-hosting-2026-04-28.md).
|
||||
|
||||
### Wrapper gotchas
|
||||
|
||||
- **`idleTimeout: 60`** on `Bun.serve` — the default 10 s cuts off cold
|
||||
queries that hit Elasticsearch and libpostal in sequence. 60 s is
|
||||
generous for the worst case while still catching actually-stuck
|
||||
connections.
|
||||
- **Colima bind-mount cache.** The mac-mini bind-mounts this repo's files
|
||||
into several monitoring containers. Colima on macOS sometimes serves a
|
||||
stale view of a bind-mounted file even after the file on disk changes.
|
||||
After editing `scripts/generate-status-page.sh` (also bind-mounted into
|
||||
`mana-status-gen`), restart the consuming container so it sees the
|
||||
fresh content: `docker restart mana-status-gen`.
|
||||
- **`host.docker.internal` doesn't resolve from blackbox-exporter** on
|
||||
Colima, so the external monitoring can't probe pelias-api or
|
||||
elasticsearch directly. Instead, the wrapper exposes `/health/pelias`
|
||||
which proxies a request to Pelias; Prometheus probes that internal
|
||||
endpoint inside the docker network. See `prometheus.yml` job
|
||||
`blackbox-internal`.
|
||||
cross-LAN queries to photon-self where OpenSearch needs to recover
|
||||
shards. 60 s is generous for the worst case while still catching
|
||||
actually-stuck connections.
|
||||
- **Cross-LAN reach is occasionally flaky.** A photon-self request
|
||||
sometimes hangs for the full `PROVIDER_TIMEOUT_MS` (8 s default), which
|
||||
marks the provider unhealthy for 30 s. During that window, requests
|
||||
fall through to public photon. With `CACHE_PUBLIC_TTL_MS=3600000` (1h),
|
||||
the cached public answers expire fast enough that the chain returns to
|
||||
photon-self once it's healthy again.
|
||||
- **`host.docker.internal` is no longer needed.** The Pelias era used
|
||||
`extra_hosts: host.docker.internal:host-gateway` to reach Pelias on
|
||||
the host network. photon-self is reached over LAN by IP, so the
|
||||
docker-compose entry no longer carries `extra_hosts`.
|
||||
|
||||
## Testing
|
||||
|
||||
|
|
@ -392,28 +331,27 @@ cd services/mana-geocoding
|
|||
bun test
|
||||
```
|
||||
|
||||
- `src/lib/__tests__/category-map.test.ts` — Pelias→PlaceCategory
|
||||
priority resolution.
|
||||
- `src/lib/__tests__/osm-category-map.test.ts` — raw OSM-tag→PlaceCategory
|
||||
mapping used by Photon + Nominatim (since they emit `class:type` rather
|
||||
than Pelias's curated taxonomy).
|
||||
- `src/lib/__tests__/osm-category-map.test.ts` — raw OSM-tag →
|
||||
PlaceCategory mapping (used by Photon + Nominatim).
|
||||
- `src/lib/__tests__/cache.test.ts` — LRU eviction order, TTL expiry,
|
||||
move-to-end on `get`, size tracking.
|
||||
- `src/lib/__tests__/rate-limiter.test.ts` — single-token rate limiter
|
||||
(used to enforce Nominatim's 1 req/sec policy). FIFO order, abort
|
||||
cleanup, busy-flag release on aborted interval-wait.
|
||||
- `src/providers/__tests__/chain.test.ts` — provider chain failover, health
|
||||
cache, "stop on empty results" semantics.
|
||||
- `src/lib/__tests__/privacy.test.ts` — coordinate quantization edge
|
||||
cases.
|
||||
- `src/lib/__tests__/sensitive-query.test.ts` — keyword-list coverage.
|
||||
- `src/providers/__tests__/chain.test.ts` — provider chain failover,
|
||||
health cache, "stop on empty results" semantics, localOnly mode.
|
||||
- `src/providers/__tests__/photon-normalizer.test.ts` and
|
||||
`nominatim-normalizer.test.ts` — locking the wire-format mapping for the
|
||||
two public fallback providers.
|
||||
|
||||
As of the 2026-04-28 privacy-hardening rollout: **141 tests, all green**.
|
||||
`nominatim-normalizer.test.ts` — wire-format mapping for the two
|
||||
public providers.
|
||||
- `src/__tests__/app.test.ts` — `createChain()` registration tests
|
||||
(photon-self opt-in via env-var, chain order honored).
|
||||
|
||||
### Smoke test (`bun run test:smoke`)
|
||||
|
||||
End-to-end curls against a running service. Requires a fully deployed
|
||||
Pelias stack with the DACH index loaded — run this after a deploy to
|
||||
End-to-end curls against a running service. Run after a deploy to
|
||||
confirm the full pipeline is healthy.
|
||||
|
||||
```bash
|
||||
|
|
@ -422,9 +360,9 @@ bun run test:smoke # default http://localhost:3
|
|||
./scripts/smoke-test.sh http://mana-geocoding:3018 # from another container
|
||||
```
|
||||
|
||||
Asserts: wrapper + pelias health, restaurant→food, station→transit,
|
||||
street+locality fallback returns results, focus biasing works, reverse
|
||||
geocoding for Konstanz and München, cache hit on repeat. 9 checks.
|
||||
Asserts: wrapper + photon-self health, restaurant→food category,
|
||||
station→transit, street/locality fallback, focus biasing, reverse
|
||||
geocoding for Konstanz and München, cache hit on repeat.
|
||||
|
||||
## Code Layout
|
||||
|
||||
|
|
@ -435,22 +373,22 @@ src/
|
|||
├── config.ts # Environment config (incl. provider list)
|
||||
├── routes/
|
||||
│ ├── geocode.ts # Forward + reverse, delegates to chain
|
||||
│ └── health.ts # /health, /health/pelias, /health/providers
|
||||
│ └── health.ts # /health, /health/photon-self, /health/providers
|
||||
├── providers/
|
||||
│ ├── types.ts # GeocodingProvider interface, shared shape
|
||||
│ ├── chain.ts # Failover orchestrator + health cache
|
||||
│ ├── pelias.ts # Primary: self-hosted DACH Pelias
|
||||
│ ├── photon.ts # Fallback 1: photon.komoot.io
|
||||
│ └── nominatim.ts # Fallback 2: nominatim.openstreetmap.org
|
||||
│ ├── photon.ts # photon-self + public photon (same class, two configs)
|
||||
│ └── nominatim.ts # Public nominatim.openstreetmap.org
|
||||
└── lib/
|
||||
├── cache.ts # LRU cache with TTL + per-entry override
|
||||
├── category-map.ts # Pelias-taxonomy → PlaceCategory
|
||||
├── category-map.ts # PlaceCategory type definition
|
||||
├── osm-category-map.ts # Raw OSM `class:type` → PlaceCategory
|
||||
├── privacy.ts # Coordinate quantization for public APIs
|
||||
├── rate-limiter.ts # Single-token limiter (used by Nominatim)
|
||||
└── sensitive-query.ts # Health/crisis keyword detector
|
||||
pelias/
|
||||
├── docker-compose.yml # Pelias stack
|
||||
├── pelias.json # Pelias config (DACH region)
|
||||
└── setup.sh # Initial data import script
|
||||
photon-self/ # Operator scripts for the mana-gpu Photon
|
||||
├── photon-update.sh # Atomic-swap weekly update (deployed to mana-gpu)
|
||||
├── photon-update.service # systemd oneshot unit
|
||||
├── photon-update.timer # systemd weekly timer
|
||||
└── README.md # Re-install steps for DR
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,121 +0,0 @@
|
|||
# Pelias geocoding stack for mana-geocoding.
|
||||
#
|
||||
# Data pipeline: download → prepare → import → serve.
|
||||
# See pelias/README.md for initial setup instructions.
|
||||
#
|
||||
# After import, only `api` and `libpostal` need to stay running.
|
||||
# The import containers (placeholder, interpolation, pip, elasticsearch)
|
||||
# run during import and can be stopped afterward if RAM is tight,
|
||||
# but elasticsearch must stay up for queries.
|
||||
|
||||
services:
|
||||
# --- Always running ---
|
||||
|
||||
api:
|
||||
image: pelias/api:latest
|
||||
container_name: pelias-api
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "4000:4000"
|
||||
environment:
|
||||
PORT: 4000
|
||||
volumes:
|
||||
- ./pelias.json:/code/pelias.json:ro
|
||||
# Patch: always return the `category` field in API responses, not only
|
||||
# when a `categories=...` filter is present. Pelias' default
|
||||
# `checkCategoryParam` hides category from results unless the caller
|
||||
# filters by it, but we want the OSM taxonomy (food, retail, transport, …)
|
||||
# on every venue so our Places UI can auto-map it to a PlaceCategory.
|
||||
# The patched file is generated from the upstream one with
|
||||
# `sed "s|condition: checkCategoryParam|condition: () => true|"`
|
||||
- ./geojsonify_place_details.js:/code/pelias/api/helper/geojsonify_place_details.js:ro
|
||||
depends_on:
|
||||
elasticsearch:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- pelias
|
||||
|
||||
libpostal:
|
||||
image: pelias/libpostal-service
|
||||
container_name: pelias-libpostal
|
||||
restart: unless-stopped
|
||||
# No host port mapping — libpostal is an internal dependency of
|
||||
# pelias-api, reached over the pelias network at libpostal:4400.
|
||||
# Port 4400 on the host is used by mana-infra-landings (nginx for
|
||||
# status.mana.how) on the production mac mini.
|
||||
expose:
|
||||
- "4400"
|
||||
networks:
|
||||
- pelias
|
||||
|
||||
elasticsearch:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.1
|
||||
container_name: pelias-elasticsearch
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "9200:9200"
|
||||
volumes:
|
||||
- pelias-elasticsearch:/usr/share/elasticsearch/data
|
||||
environment:
|
||||
ES_JAVA_OPTS: "-Xms512m -Xmx512m"
|
||||
discovery.type: single-node
|
||||
xpack.security.enabled: "false"
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9200/_cluster/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 30
|
||||
networks:
|
||||
- pelias
|
||||
|
||||
# --- Import pipeline (run once, then stop) ---
|
||||
|
||||
schema:
|
||||
image: pelias/schema:latest
|
||||
container_name: pelias-schema
|
||||
volumes:
|
||||
- ./pelias.json:/code/pelias.json:ro
|
||||
depends_on:
|
||||
elasticsearch:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- pelias
|
||||
profiles: ["import"]
|
||||
|
||||
openstreetmap:
|
||||
image: pelias/openstreetmap:latest
|
||||
container_name: pelias-openstreetmap
|
||||
volumes:
|
||||
- ./pelias.json:/code/pelias.json:ro
|
||||
- pelias-data:/data
|
||||
depends_on:
|
||||
elasticsearch:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- pelias
|
||||
profiles: ["import"]
|
||||
|
||||
polylines:
|
||||
image: pelias/polylines:latest
|
||||
container_name: pelias-polylines
|
||||
volumes:
|
||||
- ./pelias.json:/code/pelias.json:ro
|
||||
- pelias-data:/data
|
||||
depends_on:
|
||||
elasticsearch:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- pelias
|
||||
profiles: ["import"]
|
||||
|
||||
volumes:
|
||||
pelias-elasticsearch:
|
||||
pelias-data:
|
||||
|
||||
networks:
|
||||
pelias:
|
||||
driver: bridge
|
||||
|
|
@ -1,123 +0,0 @@
|
|||
const _ = require('lodash');
|
||||
const field = require('./fieldValue');
|
||||
|
||||
// Properties to be copied
|
||||
// If a property is identified as a single string, assume it should be presented as a string in response
|
||||
// If something other than string is desired, use the following structure: { name: 'category', type: 'array' }
|
||||
const DETAILS_PROPS = [
|
||||
{ name: 'unit', type: 'string' },
|
||||
{ name: 'housenumber', type: 'string' },
|
||||
{ name: 'street', type: 'string' },
|
||||
{ name: 'postalcode', type: 'string' },
|
||||
{ name: 'postalcode_gid', type: 'string' },
|
||||
{ name: 'confidence', type: 'default' },
|
||||
{ name: 'match_type', type: 'string' },
|
||||
{ name: 'distance', type: 'default' },
|
||||
{ name: 'accuracy', type: 'string' },
|
||||
{ name: 'country', type: 'string' },
|
||||
{ name: 'country_gid', type: 'string' },
|
||||
{ name: 'country_a', type: 'string' },
|
||||
{ name: 'dependency', type: 'string' },
|
||||
{ name: 'dependency_gid', type: 'string' },
|
||||
{ name: 'dependency_a', type: 'string' },
|
||||
{ name: 'macroregion', type: 'string' },
|
||||
{ name: 'macroregion_gid', type: 'string' },
|
||||
{ name: 'macroregion_a', type: 'string' },
|
||||
{ name: 'region', type: 'string' },
|
||||
{ name: 'region_gid', type: 'string' },
|
||||
{ name: 'region_a', type: 'string' },
|
||||
{ name: 'macrocounty', type: 'string' },
|
||||
{ name: 'macrocounty_gid', type: 'string' },
|
||||
{ name: 'macrocounty_a', type: 'string' },
|
||||
{ name: 'county', type: 'string' },
|
||||
{ name: 'county_gid', type: 'string' },
|
||||
{ name: 'county_a', type: 'string' },
|
||||
{ name: 'localadmin', type: 'string' },
|
||||
{ name: 'localadmin_gid', type: 'string' },
|
||||
{ name: 'localadmin_a', type: 'string' },
|
||||
{ name: 'locality', type: 'string' },
|
||||
{ name: 'locality_gid', type: 'string' },
|
||||
{ name: 'locality_a', type: 'string' },
|
||||
{ name: 'borough', type: 'string' },
|
||||
{ name: 'borough_gid', type: 'string' },
|
||||
{ name: 'borough_a', type: 'string' },
|
||||
{ name: 'neighbourhood', type: 'string' },
|
||||
{ name: 'neighbourhood_gid', type: 'string' },
|
||||
{ name: 'continent', type: 'string' },
|
||||
{ name: 'continent_gid', type: 'string' },
|
||||
{ name: 'continent_a', type: 'string' },
|
||||
{ name: 'empire', type: 'string', condition: _.negate(hasCountry) },
|
||||
{ name: 'empire_gid', type: 'string', condition: _.negate(hasCountry) },
|
||||
{ name: 'empire_a', type: 'string', condition: _.negate(hasCountry) },
|
||||
{ name: 'ocean', type: 'string' },
|
||||
{ name: 'ocean_gid', type: 'string' },
|
||||
{ name: 'ocean_a', type: 'string' },
|
||||
{ name: 'marinearea', type: 'string' },
|
||||
{ name: 'marinearea_gid', type: 'string' },
|
||||
{ name: 'marinearea_a', type: 'string' },
|
||||
{ name: 'bounding_box', type: 'default' },
|
||||
{ name: 'label', type: 'string' },
|
||||
{ name: 'category', type: 'array', condition: () => true },
|
||||
];
|
||||
|
||||
const EXTENDED_PROPS = DETAILS_PROPS.concat([
|
||||
{ name: 'population', type: 'default' },
|
||||
{ name: 'popularity', type: 'default' },
|
||||
]);
|
||||
|
||||
// returns true IFF source a country_gid property
|
||||
function hasCountry(params, source) {
|
||||
return source.hasOwnProperty('country_gid');
|
||||
}
|
||||
|
||||
function checkCategoryParam(params) {
|
||||
return _.isObject(params) && params.hasOwnProperty('categories');
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect the specified properties from source into an object and return it
|
||||
* Ignore missing properties.
|
||||
*
|
||||
* @param {object} params clean query params
|
||||
* @param {object} source
|
||||
* @param {object} dst
|
||||
*/
|
||||
function collectProperties(params, source) {
|
||||
let props = DETAILS_PROPS;
|
||||
|
||||
// extended properties when debugging mode is enabled
|
||||
if (params.enableDebug === true) {
|
||||
props = EXTENDED_PROPS;
|
||||
}
|
||||
|
||||
return props.reduce((result, prop) => {
|
||||
// if condition isn't met, don't set the property
|
||||
if (_.isFunction(prop.condition) && !prop.condition(params, source)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (source.hasOwnProperty(prop.name)) {
|
||||
let value = null;
|
||||
|
||||
switch (prop.type) {
|
||||
case 'string':
|
||||
value = field.getStringValue(source[prop.name]);
|
||||
break;
|
||||
case 'array':
|
||||
value = field.getArrayValue(source[prop.name]);
|
||||
break;
|
||||
// default behavior is to copy property exactly as is
|
||||
default:
|
||||
value = source[prop.name];
|
||||
}
|
||||
|
||||
if (_.isNumber(value) || (value && !_.isEmpty(value))) {
|
||||
result[prop.name] = value;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}, {});
|
||||
}
|
||||
|
||||
module.exports = collectProperties;
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
{
|
||||
"esclient": {
|
||||
"apiVersion": "7.x",
|
||||
"hosts": [
|
||||
{
|
||||
"host": "elasticsearch",
|
||||
"port": 9200
|
||||
}
|
||||
]
|
||||
},
|
||||
"api": {
|
||||
"services": {
|
||||
"libpostal": {
|
||||
"url": "http://libpostal:4400"
|
||||
}
|
||||
},
|
||||
"defaultParameters": {}
|
||||
},
|
||||
"imports": {
|
||||
"adminLookup": {
|
||||
"enabled": false
|
||||
},
|
||||
"openstreetmap": {
|
||||
"download": [
|
||||
{
|
||||
"sourceURL": "https://download.geofabrik.de/europe/dach-latest.osm.pbf"
|
||||
}
|
||||
],
|
||||
"datapath": "/data/openstreetmap",
|
||||
"leveldbpath": "/data/leveldb",
|
||||
"importVenues": true,
|
||||
"importAddresses": true,
|
||||
"adminLookup": false,
|
||||
"useAdminHierarchyLabels": false,
|
||||
"leveldb": {
|
||||
"cacheSize": 256
|
||||
},
|
||||
"import": [
|
||||
{
|
||||
"filename": "planet-latest.osm.pbf"
|
||||
}
|
||||
]
|
||||
},
|
||||
"polylines": {
|
||||
"datapath": "/data/polylines",
|
||||
"files": ["extract.0sv"]
|
||||
}
|
||||
},
|
||||
"logger": {
|
||||
"level": "info"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Initial Pelias data import for DACH region.
|
||||
#
|
||||
# Run this ONCE after first docker compose up.
|
||||
# Takes 30-60 minutes depending on hardware.
|
||||
#
|
||||
# After import, the "import" profile containers can be stopped.
|
||||
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
echo "=== Step 1: Create Elasticsearch schema ==="
|
||||
docker compose --profile import run --rm schema ./bin/create_index
|
||||
|
||||
echo "=== Step 2: Download DACH OSM data ==="
|
||||
mkdir -p data/openstreetmap
|
||||
docker compose --profile import run --rm openstreetmap ./bin/download
|
||||
|
||||
echo "=== Step 3: Import OpenStreetMap data ==="
|
||||
docker compose --profile import run --rm openstreetmap ./bin/start
|
||||
|
||||
echo "=== Step 4: Import polylines (street data) ==="
|
||||
docker compose --profile import run --rm polylines ./bin/download
|
||||
docker compose --profile import run --rm polylines ./bin/start
|
||||
|
||||
echo ""
|
||||
echo "=== Import complete! ==="
|
||||
echo "Pelias API is available at http://localhost:4000/v1"
|
||||
echo ""
|
||||
echo "Test it:"
|
||||
echo " curl 'http://localhost:4000/v1/search?text=Münsterplatz+Konstanz'"
|
||||
echo " curl 'http://localhost:4000/v1/reverse?point.lat=47.663&point.lon=9.175'"
|
||||
echo ""
|
||||
echo "You can now stop the import containers:"
|
||||
echo " docker compose --profile import stop"
|
||||
|
|
@ -61,7 +61,7 @@ urlenc() {
|
|||
|
||||
echo "--- Health ---"
|
||||
check "wrapper health" "$BASE/health" '.status' 'ok'
|
||||
check "pelias health proxy" "$BASE/health/pelias" '.status' 'ok'
|
||||
check "photon-self health proxy" "$BASE/health/photon-self" '.status' 'ok'
|
||||
echo
|
||||
|
||||
# --- 2. Forward geocoding ---
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ import type { Config } from '../config';
|
|||
function baseConfig(overrides: Partial<Config> = {}): Config {
|
||||
return {
|
||||
port: 3018,
|
||||
pelias: { apiUrl: 'http://127.0.0.1:1' },
|
||||
photon: { apiUrl: 'https://photon.komoot.io' },
|
||||
photonSelf: { apiUrl: undefined },
|
||||
nominatim: {
|
||||
|
|
@ -25,7 +24,7 @@ function baseConfig(overrides: Partial<Config> = {}): Config {
|
|||
cors: { origins: [] },
|
||||
cache: { maxEntries: 100, ttlMs: 1000, publicTtlMs: 7000 },
|
||||
providers: {
|
||||
enabled: ['photon-self', 'pelias', 'photon', 'nominatim'],
|
||||
enabled: ['photon-self', 'photon', 'nominatim'],
|
||||
healthCacheMs: 30_000,
|
||||
timeoutMs: 8000,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ import type { Config } from './config';
|
|||
import { RateLimiter } from './lib/rate-limiter';
|
||||
import { ProviderChain } from './providers/chain';
|
||||
import { NominatimProvider } from './providers/nominatim';
|
||||
import { PeliasProvider } from './providers/pelias';
|
||||
import { PhotonProvider } from './providers/photon';
|
||||
import type { GeocodingProvider, ProviderName } from './providers/types';
|
||||
import { createGeocodeRoutes } from './routes/geocode';
|
||||
|
|
@ -47,18 +46,10 @@ export function createApp(config: Config): Hono {
|
|||
export function createChain(config: Config): ProviderChain {
|
||||
const built = new Map<ProviderName, GeocodingProvider>();
|
||||
|
||||
built.set(
|
||||
'pelias',
|
||||
new PeliasProvider({
|
||||
apiUrl: config.pelias.apiUrl,
|
||||
timeoutMs: config.providers.timeoutMs,
|
||||
})
|
||||
);
|
||||
|
||||
// Self-hosted Photon (mana-gpu). Only registered when the env-var is set
|
||||
// — pre-migration this stays absent and the chain falls through to
|
||||
// public providers as before. Once the GPU server is running Photon,
|
||||
// flip PHOTON_SELF_API_URL on and this becomes the primary provider.
|
||||
// — without it the chain runs on public providers only. Once the GPU
|
||||
// server is running Photon, flip PHOTON_SELF_API_URL on and this
|
||||
// becomes the primary provider.
|
||||
if (config.photonSelf.apiUrl) {
|
||||
built.set(
|
||||
'photon-self',
|
||||
|
|
|
|||
|
|
@ -6,10 +6,6 @@ import type { ProviderName } from './providers/types';
|
|||
|
||||
export interface Config {
|
||||
port: number;
|
||||
pelias: {
|
||||
/** Pelias API base URL (the API container, not the placeholder service) */
|
||||
apiUrl: string;
|
||||
};
|
||||
photon: {
|
||||
/** Photon base URL — public komoot endpoint by default. Used by
|
||||
* the `'photon'` provider slot which always has `privacy: 'public'`. */
|
||||
|
|
@ -20,7 +16,7 @@ export interface Config {
|
|||
* GPU server). When set, the wrapper registers a separate
|
||||
* `'photon-self'` provider with `privacy: 'local'` — eligible for
|
||||
* sensitive queries. When undefined, the slot is disabled and the
|
||||
* chain only has the public providers (current pre-migration state). */
|
||||
* chain runs on public providers only. */
|
||||
apiUrl: string | undefined;
|
||||
};
|
||||
nominatim: {
|
||||
|
|
@ -37,12 +33,13 @@ export interface Config {
|
|||
/** Max entries in the in-memory LRU cache */
|
||||
maxEntries: number;
|
||||
/** Default TTL in milliseconds (24h — used for results from local
|
||||
* providers like Pelias, where the index can be re-imported) */
|
||||
* providers like photon-self) */
|
||||
ttlMs: number;
|
||||
/** Extended TTL for results that came from public APIs (Photon,
|
||||
* Nominatim). 7 days by default — caching aggressively reduces
|
||||
* the number of times we forward query content to a third party,
|
||||
* which is the main privacy lever we have over public providers. */
|
||||
/** TTL for results that came from public APIs (Photon, Nominatim).
|
||||
* Capped at 1h so a brief blip in photon-self can't pin stale
|
||||
* public-fallback answers in the cache for days. The privacy
|
||||
* benefit of long TTLs (fewer outbound queries) is moot now that
|
||||
* photon-self serves the bulk of traffic. */
|
||||
publicTtlMs: number;
|
||||
};
|
||||
providers: {
|
||||
|
|
@ -60,9 +57,6 @@ export interface Config {
|
|||
export function loadConfig(): Config {
|
||||
return {
|
||||
port: parseInt(process.env.PORT || '3018', 10),
|
||||
pelias: {
|
||||
apiUrl: process.env.PELIAS_API_URL || 'http://localhost:4000/v1',
|
||||
},
|
||||
photon: {
|
||||
apiUrl: process.env.PHOTON_API_URL || 'https://photon.komoot.io',
|
||||
},
|
||||
|
|
@ -86,17 +80,15 @@ export function loadConfig(): Config {
|
|||
cache: {
|
||||
maxEntries: parseInt(process.env.CACHE_MAX_ENTRIES || '5000', 10),
|
||||
ttlMs: parseInt(process.env.CACHE_TTL_MS || String(24 * 60 * 60 * 1000), 10),
|
||||
publicTtlMs: parseInt(process.env.CACHE_PUBLIC_TTL_MS || String(7 * 24 * 60 * 60 * 1000), 10),
|
||||
publicTtlMs: parseInt(process.env.CACHE_PUBLIC_TTL_MS || String(60 * 60 * 1000), 10),
|
||||
},
|
||||
providers: {
|
||||
// Default order (when GEOCODING_PROVIDERS is unset): try the
|
||||
// self-hosted Photon first if it's been configured, then public
|
||||
// providers as fallback. `photon-self` is silently dropped at
|
||||
// chain-build time if `photonSelf.apiUrl` is undefined, so the
|
||||
// list is the same shape regardless of migration status.
|
||||
// chain-build time if `photonSelf.apiUrl` is undefined.
|
||||
enabled: parseProviderList(process.env.GEOCODING_PROVIDERS, [
|
||||
'photon-self',
|
||||
'pelias',
|
||||
'photon',
|
||||
'nominatim',
|
||||
]),
|
||||
|
|
@ -112,7 +104,7 @@ export function loadConfig(): Config {
|
|||
|
||||
function parseProviderList(raw: string | undefined, fallback: ProviderName[]): ProviderName[] {
|
||||
if (!raw) return fallback;
|
||||
const valid: ProviderName[] = ['pelias', 'photon-self', 'photon', 'nominatim'];
|
||||
const valid: ProviderName[] = ['photon-self', 'photon', 'nominatim'];
|
||||
const parsed = raw
|
||||
.split(',')
|
||||
.map((s) => s.trim().toLowerCase())
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
/**
|
||||
* mana-geocoding — Self-hosted geocoding proxy.
|
||||
*
|
||||
* Wraps a local Pelias instance with caching and OSM → PlaceCategory
|
||||
* mapping. All geocoding queries stay within our infrastructure —
|
||||
* no user location data leaves the network.
|
||||
* mana-geocoding — geocoding proxy with provider chain (photon-self →
|
||||
* public photon → public nominatim) and aggressive caching. Sensitive
|
||||
* queries are blocked from public providers; all forwarded queries are
|
||||
* coordinate-quantized.
|
||||
*/
|
||||
|
||||
import { createApp } from './app';
|
||||
|
|
@ -12,13 +11,17 @@ import { loadConfig } from './config';
|
|||
const config = loadConfig();
|
||||
|
||||
console.log(`mana-geocoding starting on port ${config.port}...`);
|
||||
console.log(`Pelias API: ${config.pelias.apiUrl}`);
|
||||
console.log(`Providers: ${config.providers.enabled.join(', ')}`);
|
||||
if (config.photonSelf.apiUrl) {
|
||||
console.log(`photon-self: ${config.photonSelf.apiUrl}`);
|
||||
}
|
||||
|
||||
export default {
|
||||
port: config.port,
|
||||
// Bun's default idleTimeout is 10s — too tight for Pelias cold queries
|
||||
// that need to hit Elasticsearch and libpostal. 60s is generous enough
|
||||
// for the worst-case while still cutting off stuck connections.
|
||||
// Bun's default idleTimeout is 10s — too tight for cold cross-LAN
|
||||
// queries to photon-self that hit OpenSearch on a fresh shard. 60s is
|
||||
// generous enough for the worst case while still cutting off stuck
|
||||
// connections.
|
||||
idleTimeout: 60,
|
||||
fetch: createApp(config).fetch,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,184 +0,0 @@
|
|||
/**
|
||||
* Unit tests for the Pelias→PlaceCategory mapping.
|
||||
*
|
||||
* This is the subtle part of the service: a Pelias venue often has
|
||||
* multiple categories (e.g. a restaurant is `['food','retail','nightlife']`)
|
||||
* and we need to pick the most specific one. The priority list in
|
||||
* category-map.ts encodes that choice, and these tests lock it in.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
import { mapPeliasToPlaceCategory } from '../category-map';
|
||||
|
||||
describe('mapPeliasToPlaceCategory', () => {
|
||||
describe('priority-ordered multi-category resolution', () => {
|
||||
it('picks food over retail for a restaurant', () => {
|
||||
expect(mapPeliasToPlaceCategory(['food', 'retail', 'nightlife'])).toBe('food');
|
||||
});
|
||||
|
||||
it('picks food over retail for a bakery', () => {
|
||||
// Bakery is tagged food+retail in the Pelias OSM taxonomy
|
||||
expect(mapPeliasToPlaceCategory(['food', 'retail'])).toBe('food');
|
||||
});
|
||||
|
||||
it('picks food over nightlife for a cafe', () => {
|
||||
expect(mapPeliasToPlaceCategory(['food', 'nightlife'])).toBe('food');
|
||||
});
|
||||
|
||||
it('picks transit over professional for a car_rental', () => {
|
||||
// car_rental is tagged transport+professional in Pelias
|
||||
expect(mapPeliasToPlaceCategory(['transport', 'professional'])).toBe('transit');
|
||||
});
|
||||
|
||||
it('picks transit for a bus_station (multiple transport subcategories)', () => {
|
||||
expect(mapPeliasToPlaceCategory(['transport', 'transport:public', 'transport:bus'])).toBe(
|
||||
'transit'
|
||||
);
|
||||
});
|
||||
|
||||
it('picks transit for a station (transport:rail)', () => {
|
||||
expect(
|
||||
mapPeliasToPlaceCategory([
|
||||
'transport',
|
||||
'transport:public',
|
||||
'transport:station',
|
||||
'transport:rail',
|
||||
])
|
||||
).toBe('transit');
|
||||
});
|
||||
});
|
||||
|
||||
describe('single-category resolution', () => {
|
||||
it('maps food to food', () => {
|
||||
expect(mapPeliasToPlaceCategory(['food'])).toBe('food');
|
||||
});
|
||||
|
||||
it('maps retail to shopping', () => {
|
||||
expect(mapPeliasToPlaceCategory(['retail'])).toBe('shopping');
|
||||
});
|
||||
|
||||
it('maps transport to transit', () => {
|
||||
expect(mapPeliasToPlaceCategory(['transport'])).toBe('transit');
|
||||
});
|
||||
|
||||
it('maps education to work', () => {
|
||||
expect(mapPeliasToPlaceCategory(['education'])).toBe('work');
|
||||
});
|
||||
|
||||
it('maps professional to work', () => {
|
||||
expect(mapPeliasToPlaceCategory(['professional'])).toBe('work');
|
||||
});
|
||||
|
||||
it('maps government to work', () => {
|
||||
expect(mapPeliasToPlaceCategory(['government'])).toBe('work');
|
||||
});
|
||||
|
||||
it('maps finance to work', () => {
|
||||
expect(mapPeliasToPlaceCategory(['finance'])).toBe('work');
|
||||
});
|
||||
|
||||
it('maps entertainment to leisure', () => {
|
||||
expect(mapPeliasToPlaceCategory(['entertainment'])).toBe('leisure');
|
||||
});
|
||||
|
||||
it('maps nightlife to leisure', () => {
|
||||
expect(mapPeliasToPlaceCategory(['nightlife'])).toBe('leisure');
|
||||
});
|
||||
|
||||
it('maps recreation to leisure', () => {
|
||||
expect(mapPeliasToPlaceCategory(['recreation'])).toBe('leisure');
|
||||
});
|
||||
|
||||
it('maps health to other', () => {
|
||||
expect(mapPeliasToPlaceCategory(['health'])).toBe('other');
|
||||
});
|
||||
|
||||
it('maps religion to other', () => {
|
||||
expect(mapPeliasToPlaceCategory(['religion'])).toBe('other');
|
||||
});
|
||||
});
|
||||
|
||||
describe('real-world Pelias venue categories', () => {
|
||||
// These are literal category arrays observed from the Konstanz DACH
|
||||
// index during the 2026-04-11 deploy verification. Locking them in
|
||||
// as regression tests so future priority changes can't silently
|
||||
// break address search in production.
|
||||
|
||||
it('Konzil Restaurant Konstanz → food', () => {
|
||||
expect(mapPeliasToPlaceCategory(['food', 'retail', 'nightlife'])).toBe('food');
|
||||
});
|
||||
|
||||
it('Stuttgart Hauptbahnhof → transit', () => {
|
||||
expect(
|
||||
mapPeliasToPlaceCategory([
|
||||
'transport',
|
||||
'transport:public',
|
||||
'transport:station',
|
||||
'transport:rail',
|
||||
])
|
||||
).toBe('transit');
|
||||
});
|
||||
|
||||
it('Physiotherapie-Schule → work', () => {
|
||||
expect(mapPeliasToPlaceCategory(['education'])).toBe('work');
|
||||
});
|
||||
|
||||
it('MX-Park (Rennstrecke) → leisure', () => {
|
||||
expect(mapPeliasToPlaceCategory(['recreation'])).toBe('leisure');
|
||||
});
|
||||
|
||||
it('KulturKiosk → work', () => {
|
||||
// KulturKiosk is tagged professional in Pelias
|
||||
expect(mapPeliasToPlaceCategory(['professional'])).toBe('work');
|
||||
});
|
||||
|
||||
it('Kölner Domshop → shopping', () => {
|
||||
expect(mapPeliasToPlaceCategory(['retail'])).toBe('shopping');
|
||||
});
|
||||
});
|
||||
|
||||
describe('empty / null / unknown categories', () => {
|
||||
it('returns other for empty array', () => {
|
||||
expect(mapPeliasToPlaceCategory([])).toBe('other');
|
||||
});
|
||||
|
||||
it('returns other for undefined', () => {
|
||||
expect(mapPeliasToPlaceCategory(undefined)).toBe('other');
|
||||
});
|
||||
|
||||
it('returns other for null', () => {
|
||||
expect(mapPeliasToPlaceCategory(null)).toBe('other');
|
||||
});
|
||||
|
||||
it('returns other for unknown category strings', () => {
|
||||
expect(mapPeliasToPlaceCategory(['random', 'unknown'])).toBe('other');
|
||||
});
|
||||
|
||||
it('picks known category even if unknown ones come first', () => {
|
||||
expect(mapPeliasToPlaceCategory(['unknown', 'food'])).toBe('food');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Pelias layer fallback', () => {
|
||||
it('uses layer hint for venue with no categories', () => {
|
||||
expect(mapPeliasToPlaceCategory(undefined, 'venue')).toBe('other');
|
||||
});
|
||||
|
||||
it('uses layer hint for address', () => {
|
||||
expect(mapPeliasToPlaceCategory(undefined, 'address')).toBe('other');
|
||||
});
|
||||
|
||||
it('uses layer hint for street', () => {
|
||||
expect(mapPeliasToPlaceCategory(undefined, 'street')).toBe('other');
|
||||
});
|
||||
|
||||
it('uses layer hint for locality', () => {
|
||||
expect(mapPeliasToPlaceCategory(undefined, 'locality')).toBe('other');
|
||||
});
|
||||
|
||||
it('prefers categories over layer hint', () => {
|
||||
// A venue with food category should be food, not other
|
||||
expect(mapPeliasToPlaceCategory(['food'], 'venue')).toBe('food');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -2,8 +2,6 @@
|
|||
* Unit tests for the raw-OSM-tag → PlaceCategory mapper.
|
||||
*
|
||||
* Covers the cases Photon and Nominatim emit for typical DACH queries.
|
||||
* The Pelias mapper has its own tests in category-map.test.ts; this file
|
||||
* tests *only* the raw-OSM-tag path used by the public-API fallbacks.
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
|
|
@ -54,7 +52,7 @@ describe('mapOsmTagToPlaceCategory', () => {
|
|||
expect(mapOsmTagToPlaceCategory('aeroway', 'aerodrome')).toBe('transit');
|
||||
});
|
||||
it('amenity:car_rental → transit', () => {
|
||||
// Matches Pelias mapper's "car_rental → transit" decision
|
||||
// car_rental → transit (transport-flavored)
|
||||
expect(mapOsmTagToPlaceCategory('amenity', 'car_rental')).toBe('transit');
|
||||
});
|
||||
});
|
||||
|
|
@ -116,7 +114,7 @@ describe('mapOsmTagToPlaceCategory', () => {
|
|||
|
||||
describe('other (health/religion/unknown)', () => {
|
||||
it('amenity:hospital → other', () => {
|
||||
// Health goes to other (matches Pelias mapper)
|
||||
// Health goes to other
|
||||
expect(mapOsmTagToPlaceCategory('amenity', 'hospital')).toBe('other');
|
||||
});
|
||||
it('amenity:pharmacy → other', () => {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/**
|
||||
* Simple in-memory LRU cache with TTL for geocoding results.
|
||||
* Geocoding results rarely change, so we cache aggressively to
|
||||
* reduce load on the Pelias instance.
|
||||
* Geocoding results rarely change, so we cache to reduce load on
|
||||
* upstream providers.
|
||||
*/
|
||||
|
||||
interface CacheEntry<T> {
|
||||
|
|
@ -37,11 +37,10 @@ export class LRUCache<T> {
|
|||
/**
|
||||
* Insert or update a cache entry.
|
||||
*
|
||||
* @param ttlOverrideMs Optional per-entry TTL. Useful when results
|
||||
* from public-API providers should live longer than results from
|
||||
* the (frequently-changing) local Pelias index — e.g. 7 days for
|
||||
* Photon/Nominatim answers, 24 hours for Pelias answers. When
|
||||
* omitted, the constructor's default TTL applies.
|
||||
* @param ttlOverrideMs Optional per-entry TTL. The route layer uses
|
||||
* this so public-fallback answers expire faster than local-provider
|
||||
* answers — see `ttlFor()` in routes/geocode.ts. When omitted, the
|
||||
* constructor's default TTL applies.
|
||||
*/
|
||||
set(key: string, value: T, ttlOverrideMs?: number): void {
|
||||
// Delete first so re-insert goes to end
|
||||
|
|
|
|||
|
|
@ -1,89 +1,10 @@
|
|||
/**
|
||||
* Maps Pelias categories (OSM taxonomy) to our 7 Places categories.
|
||||
*
|
||||
* Pelias' openstreetmap importer tags venues with categories from its
|
||||
* built-in taxonomy (food, retail, transport, health, education, …).
|
||||
* We collapse those into the simpler Places enum:
|
||||
* The 7 Places categories used across the geocoding wrapper and clients.
|
||||
*
|
||||
* home · work · food · shopping · transit · leisure · other
|
||||
*
|
||||
* A venue can have multiple Pelias categories (e.g. a restaurant is
|
||||
* tagged `['food', 'retail', 'nightlife']`). We pick the most specific
|
||||
* one in priority order rather than the first — a restaurant should be
|
||||
* "food" even though "retail" also matches.
|
||||
* Provider-specific mappers (see `osm-category-map.ts` for Photon /
|
||||
* Nominatim) collapse the upstream taxonomy into this shape. `home` is
|
||||
* never auto-detected — it's set manually by the user.
|
||||
*/
|
||||
|
||||
export type PlaceCategory = 'home' | 'work' | 'food' | 'shopping' | 'transit' | 'leisure' | 'other';
|
||||
|
||||
/**
|
||||
* Priority-ordered: first matching category wins. Earlier entries are
|
||||
* more specific, so "food" beats "retail", "transport" beats "professional".
|
||||
*/
|
||||
const PELIAS_PRIORITY: Array<[string, PlaceCategory]> = [
|
||||
// Food is strongest signal — a restaurant is food, not retail
|
||||
['food', 'food'],
|
||||
|
||||
// Transit/transport
|
||||
['transport:public', 'transit'],
|
||||
['transport:air', 'transit'],
|
||||
['transport:sea', 'transit'],
|
||||
['transport:bus', 'transit'],
|
||||
['transport:taxi', 'transit'],
|
||||
['transport', 'transit'],
|
||||
|
||||
// Shopping — explicit retail markers
|
||||
['retail', 'shopping'],
|
||||
|
||||
// Leisure / entertainment / recreation
|
||||
['entertainment', 'leisure'],
|
||||
['nightlife', 'leisure'],
|
||||
['recreation', 'leisure'],
|
||||
|
||||
// Work-ish
|
||||
['education', 'work'],
|
||||
['professional', 'work'],
|
||||
['government', 'work'],
|
||||
['finance', 'work'],
|
||||
|
||||
// Health/religion fall through to other
|
||||
['health', 'other'],
|
||||
['religion', 'other'],
|
||||
];
|
||||
|
||||
/**
|
||||
* Derive a PlaceCategory from a Pelias feature's category array.
|
||||
*
|
||||
* @param categories The `category` array from a Pelias feature's properties
|
||||
* @param peliasLayer The Pelias layer (venue, address, street, …) — used as fallback hint
|
||||
*/
|
||||
export function mapPeliasToPlaceCategory(
|
||||
categories?: string[] | null,
|
||||
peliasLayer?: string
|
||||
): PlaceCategory {
|
||||
if (Array.isArray(categories) && categories.length > 0) {
|
||||
// Walk our priority list and pick the first match
|
||||
for (const [peliasCat, placeCat] of PELIAS_PRIORITY) {
|
||||
if (categories.includes(peliasCat)) return placeCat;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: use Pelias layer as a hint. Addresses/streets/regions
|
||||
// all land in "other" since they aren't really "places" in the
|
||||
// categorical sense.
|
||||
if (peliasLayer) {
|
||||
switch (peliasLayer) {
|
||||
case 'venue':
|
||||
return 'other';
|
||||
case 'address':
|
||||
case 'street':
|
||||
return 'other';
|
||||
case 'neighbourhood':
|
||||
case 'locality':
|
||||
case 'region':
|
||||
case 'country':
|
||||
return 'other';
|
||||
}
|
||||
}
|
||||
|
||||
return 'other';
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,15 +2,9 @@
|
|||
* Maps raw OSM `class:type` tags (Photon's `osm_key:osm_value`,
|
||||
* Nominatim's `class:type`) to our 7 PlaceCategories.
|
||||
*
|
||||
* Pelias has a curated multi-category taxonomy (`food`, `retail`,
|
||||
* `transport`, …) that we map via `category-map.ts`. Photon and Nominatim
|
||||
* return raw OSM tags instead — `amenity:restaurant`, `shop:supermarket`,
|
||||
* `public_transport:station`, etc. — so they need a different lookup.
|
||||
*
|
||||
* The list below is intentionally narrow: it only covers tags we actually
|
||||
* see in real Photon/Nominatim responses for DACH queries. Anything else
|
||||
* falls through to `other`, which matches the Pelias mapper's behavior for
|
||||
* unknown categories.
|
||||
* falls through to `other`.
|
||||
*
|
||||
* If a query returns a tag we don't handle, that's the signal to add it
|
||||
* here — not to try to enumerate all 1000+ OSM types.
|
||||
|
|
@ -25,8 +19,8 @@ interface Tag {
|
|||
|
||||
/**
|
||||
* Priority-ordered: first match wins. More-specific entries (with a
|
||||
* `value`) come before generic key-only entries. Matches Pelias's
|
||||
* "food beats retail" priority intent.
|
||||
* `value`) come before generic key-only entries. Same "food beats retail"
|
||||
* priority intent as the upstream taxonomies.
|
||||
*/
|
||||
const OSM_RULES: Array<{ match: Tag; category: PlaceCategory }> = [
|
||||
// ── Food (highest priority — restaurants are food, even when also
|
||||
|
|
@ -82,7 +76,7 @@ const OSM_RULES: Array<{ match: Tag; category: PlaceCategory }> = [
|
|||
{ match: { key: 'amenity', value: 'embassy' }, category: 'work' },
|
||||
{ match: { key: 'office' }, category: 'work' },
|
||||
|
||||
// ── Health / religion → other (matches Pelias mapper) ───────────
|
||||
// ── Health / religion → other ───────────────────────────────────
|
||||
{ match: { key: 'amenity', value: 'hospital' }, category: 'other' },
|
||||
{ match: { key: 'amenity', value: 'clinic' }, category: 'other' },
|
||||
{ match: { key: 'amenity', value: 'doctors' }, category: 'other' },
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
* not telling Photon "user is at THIS HOUSE". Reverse geocoding
|
||||
* against the city block instead of the building is acceptable.
|
||||
*
|
||||
* Pelias and other LAN-local providers always get the original
|
||||
* Photon-self and other LAN-local providers always get the original
|
||||
* full-precision coordinates — quantization only applies on the way
|
||||
* out to the public internet.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@
|
|||
*
|
||||
* Trade-offs:
|
||||
* - False positives are OK (a user searching for "Praxis Müller" who
|
||||
* wanted the dance studio gets 0 results when Pelias is down — not
|
||||
* wanted the dance studio gets 0 results when photon-self is down — not
|
||||
* ideal but better than a privacy leak)
|
||||
* - False negatives are NOT OK (we'd rather over-block than under-block)
|
||||
* - The list is intentionally narrow: only words with clear medical or
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ const SEARCH: SearchRequest = { q: 'test', limit: 5, lang: 'de' };
|
|||
|
||||
describe('ProviderChain — happy path', () => {
|
||||
it('returns the first provider that succeeds', async () => {
|
||||
const a = new FakeProvider('pelias');
|
||||
const a = new FakeProvider('photon-self');
|
||||
const b = new FakeProvider('photon');
|
||||
const chain = new ProviderChain({
|
||||
providers: [a, b],
|
||||
|
|
@ -76,29 +76,29 @@ describe('ProviderChain — happy path', () => {
|
|||
});
|
||||
const res = await chain.search(SEARCH);
|
||||
expect(res.ok).toBe(true);
|
||||
expect(res.provider).toBe('pelias');
|
||||
expect(res.tried).toEqual(['pelias']);
|
||||
expect(res.provider).toBe('photon-self');
|
||||
expect(res.tried).toEqual(['photon-self']);
|
||||
expect(a.calls.search).toBe(1);
|
||||
expect(b.calls.search).toBe(0);
|
||||
});
|
||||
|
||||
it('honors the providers array order', async () => {
|
||||
const photon = new FakeProvider('photon');
|
||||
const pelias = new FakeProvider('pelias');
|
||||
const local = new FakeProvider('photon-self');
|
||||
// photon first this time
|
||||
const chain = new ProviderChain({
|
||||
providers: [photon, pelias],
|
||||
providers: [photon, local],
|
||||
healthCacheMs: 60_000,
|
||||
});
|
||||
const res = await chain.search(SEARCH);
|
||||
expect(res.provider).toBe('photon');
|
||||
expect(pelias.calls.search).toBe(0);
|
||||
expect(local.calls.search).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('ProviderChain — failover', () => {
|
||||
it('falls through on unreachable, returns next provider', async () => {
|
||||
const a = new FakeProvider('pelias', {
|
||||
const a = new FakeProvider('photon-self', {
|
||||
search: async () => ({ ok: false, kind: 'unreachable', status: 503 }),
|
||||
});
|
||||
const b = new FakeProvider('photon');
|
||||
|
|
@ -106,7 +106,7 @@ describe('ProviderChain — failover', () => {
|
|||
const res = await chain.search(SEARCH);
|
||||
expect(res.ok).toBe(true);
|
||||
expect(res.provider).toBe('photon');
|
||||
expect(res.tried).toEqual(['pelias', 'photon']);
|
||||
expect(res.tried).toEqual(['photon-self', 'photon']);
|
||||
});
|
||||
|
||||
it('falls through on rate_limited', async () => {
|
||||
|
|
@ -121,20 +121,20 @@ describe('ProviderChain — failover', () => {
|
|||
|
||||
it('STOPS on empty results — does not consume fallback budget', async () => {
|
||||
// A clean empty answer is definitive: don't burn through public APIs.
|
||||
const a = new FakeProvider('pelias', {
|
||||
const a = new FakeProvider('photon-self', {
|
||||
search: async () => ({ ok: true, results: [] }),
|
||||
});
|
||||
const b = new FakeProvider('photon');
|
||||
const chain = new ProviderChain({ providers: [a, b], healthCacheMs: 60_000 });
|
||||
const res = await chain.search(SEARCH);
|
||||
expect(res.ok).toBe(true);
|
||||
expect(res.provider).toBe('pelias');
|
||||
expect(res.provider).toBe('photon-self');
|
||||
expect(res.results).toEqual([]);
|
||||
expect(b.calls.search).toBe(0);
|
||||
});
|
||||
|
||||
it('returns ok:false when all providers fail', async () => {
|
||||
const a = new FakeProvider('pelias', {
|
||||
const a = new FakeProvider('photon-self', {
|
||||
search: async () => ({ ok: false, kind: 'unreachable' }),
|
||||
});
|
||||
const b = new FakeProvider('photon', {
|
||||
|
|
@ -144,23 +144,23 @@ describe('ProviderChain — failover', () => {
|
|||
const res = await chain.search(SEARCH);
|
||||
expect(res.ok).toBe(false);
|
||||
expect(res.results).toEqual([]);
|
||||
expect(res.tried).toEqual(['pelias', 'photon']);
|
||||
expect(res.tried).toEqual(['photon-self', 'photon']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('ProviderChain — health cache', () => {
|
||||
it('skips a provider whose health probe returned false', async () => {
|
||||
const dead = new FakeProvider('pelias', { health: async () => false });
|
||||
const dead = new FakeProvider('photon-self', { health: async () => false });
|
||||
const alive = new FakeProvider('photon');
|
||||
const chain = new ProviderChain({ providers: [dead, alive], healthCacheMs: 60_000 });
|
||||
const res = await chain.search(SEARCH);
|
||||
expect(res.tried).toEqual(['photon']); // pelias was skipped, not tried
|
||||
expect(res.tried).toEqual(['photon']); // local was skipped, not tried
|
||||
expect(dead.calls.search).toBe(0);
|
||||
expect(dead.calls.health).toBe(1);
|
||||
});
|
||||
|
||||
it('caches health for healthCacheMs — only one probe per window', async () => {
|
||||
const a = new FakeProvider('pelias');
|
||||
const a = new FakeProvider('photon-self');
|
||||
const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 });
|
||||
await chain.search(SEARCH);
|
||||
await chain.search(SEARCH);
|
||||
|
|
@ -171,18 +171,19 @@ describe('ProviderChain — health cache', () => {
|
|||
|
||||
it('marks provider unhealthy when search fails, skipping it next time', async () => {
|
||||
let failNext = true;
|
||||
const flaky = new FakeProvider('pelias', {
|
||||
search: async () => (failNext ? { ok: false, kind: 'unreachable' } : okResults('pelias')),
|
||||
const flaky = new FakeProvider('photon-self', {
|
||||
search: async () =>
|
||||
failNext ? { ok: false, kind: 'unreachable' } : okResults('photon-self'),
|
||||
});
|
||||
const alive = new FakeProvider('photon');
|
||||
const chain = new ProviderChain({ providers: [flaky, alive], healthCacheMs: 60_000 });
|
||||
|
||||
// First call: pelias fails → cached unhealthy → photon serves
|
||||
// First call: local fails → cached unhealthy → photon serves
|
||||
const r1 = await chain.search(SEARCH);
|
||||
expect(r1.provider).toBe('photon');
|
||||
expect(r1.tried).toEqual(['pelias', 'photon']);
|
||||
expect(r1.tried).toEqual(['photon-self', 'photon']);
|
||||
|
||||
// Second call: pelias is in unhealthy cache, not tried at all
|
||||
// Second call: local is in unhealthy cache, not tried at all
|
||||
failNext = false; // would now succeed but never gets called
|
||||
const r2 = await chain.search(SEARCH);
|
||||
expect(r2.provider).toBe('photon');
|
||||
|
|
@ -191,7 +192,7 @@ describe('ProviderChain — health cache', () => {
|
|||
});
|
||||
|
||||
it('refreshes health after cache expires', async () => {
|
||||
const dead = new FakeProvider('pelias', { health: async () => false });
|
||||
const dead = new FakeProvider('photon-self', { health: async () => false });
|
||||
const alive = new FakeProvider('photon');
|
||||
// 1ms cache for fast test
|
||||
const chain = new ProviderChain({ providers: [dead, alive], healthCacheMs: 1 });
|
||||
|
|
@ -203,7 +204,7 @@ describe('ProviderChain — health cache', () => {
|
|||
});
|
||||
|
||||
it('clearHealthCache forces re-probe', async () => {
|
||||
const a = new FakeProvider('pelias');
|
||||
const a = new FakeProvider('photon-self');
|
||||
const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 });
|
||||
await chain.search(SEARCH);
|
||||
expect(a.calls.health).toBe(1);
|
||||
|
|
@ -215,19 +216,19 @@ describe('ProviderChain — health cache', () => {
|
|||
|
||||
describe('ProviderChain — getHealthSnapshot', () => {
|
||||
it('reports per-provider health + age', async () => {
|
||||
const ok = new FakeProvider('pelias');
|
||||
const ok = new FakeProvider('photon-self');
|
||||
const dead = new FakeProvider('photon', { health: async () => false });
|
||||
const chain = new ProviderChain({ providers: [ok, dead], healthCacheMs: 60_000 });
|
||||
await chain.search(SEARCH);
|
||||
const snap = chain.getHealthSnapshot();
|
||||
expect(snap).toHaveLength(2);
|
||||
expect(snap[0]).toMatchObject({ name: 'pelias', healthy: true });
|
||||
expect(snap[0]).toMatchObject({ name: 'photon-self', healthy: true });
|
||||
expect(snap[1]).toMatchObject({ name: 'photon', healthy: false });
|
||||
expect(snap[0].ageMs).toBeLessThan(1000);
|
||||
});
|
||||
|
||||
it('reports Infinity age for never-probed providers', async () => {
|
||||
const a = new FakeProvider('pelias');
|
||||
const a = new FakeProvider('photon-self');
|
||||
const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 });
|
||||
const snap = chain.getHealthSnapshot();
|
||||
expect(snap[0].ageMs).toBe(Infinity);
|
||||
|
|
@ -237,7 +238,7 @@ describe('ProviderChain — getHealthSnapshot', () => {
|
|||
|
||||
describe('ProviderChain — reverse', () => {
|
||||
it('uses the same provider order for reverse', async () => {
|
||||
const a = new FakeProvider('pelias', {
|
||||
const a = new FakeProvider('photon-self', {
|
||||
reverse: async () => ({ ok: false, kind: 'unreachable' }),
|
||||
});
|
||||
const b = new FakeProvider('photon', { privacy: 'public' });
|
||||
|
|
@ -251,26 +252,26 @@ describe('ProviderChain — reverse', () => {
|
|||
|
||||
describe('ProviderChain — privacy / localOnly mode', () => {
|
||||
it('skips public providers when localOnly is true', async () => {
|
||||
const localPelias = new FakeProvider('pelias', { privacy: 'local' });
|
||||
const localProvider = new FakeProvider('photon-self', { privacy: 'local' });
|
||||
const publicPhoton = new FakeProvider('photon', { privacy: 'public' });
|
||||
const publicNominatim = new FakeProvider('nominatim', { privacy: 'public' });
|
||||
const chain = new ProviderChain({
|
||||
providers: [localPelias, publicPhoton, publicNominatim],
|
||||
providers: [localProvider, publicPhoton, publicNominatim],
|
||||
healthCacheMs: 60_000,
|
||||
});
|
||||
|
||||
const res = await chain.search(SEARCH, undefined, { localOnly: true });
|
||||
|
||||
expect(res.ok).toBe(true);
|
||||
expect(res.provider).toBe('pelias');
|
||||
expect(localPelias.calls.search).toBe(1);
|
||||
expect(res.provider).toBe('photon-self');
|
||||
expect(localProvider.calls.search).toBe(1);
|
||||
// Public providers must not even have their search() called
|
||||
expect(publicPhoton.calls.search).toBe(0);
|
||||
expect(publicNominatim.calls.search).toBe(0);
|
||||
});
|
||||
|
||||
it('falls back to the second LOCAL provider when the first local fails', async () => {
|
||||
const local1 = new FakeProvider('pelias', {
|
||||
const local1 = new FakeProvider('photon-self', {
|
||||
privacy: 'local',
|
||||
search: async () => ({ ok: false, kind: 'unreachable' }),
|
||||
});
|
||||
|
|
@ -313,7 +314,7 @@ describe('ProviderChain — privacy / localOnly mode', () => {
|
|||
});
|
||||
|
||||
it('returns notice: fallback_used when a public provider serves a non-sensitive query', async () => {
|
||||
const localDown = new FakeProvider('pelias', {
|
||||
const localDown = new FakeProvider('photon-self', {
|
||||
privacy: 'local',
|
||||
health: async () => false,
|
||||
});
|
||||
|
|
@ -329,10 +330,10 @@ describe('ProviderChain — privacy / localOnly mode', () => {
|
|||
});
|
||||
|
||||
it('NO notice when the local provider serves a non-sensitive query', async () => {
|
||||
const localUp = new FakeProvider('pelias', { privacy: 'local' });
|
||||
const localUp = new FakeProvider('photon-self', { privacy: 'local' });
|
||||
const chain = new ProviderChain({ providers: [localUp], healthCacheMs: 60_000 });
|
||||
const res = await chain.search(SEARCH);
|
||||
expect(res.provider).toBe('pelias');
|
||||
expect(res.provider).toBe('photon-self');
|
||||
expect(res.notice).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/**
|
||||
* Tests for normalizing Nominatim's flat-JSON shape into our GeocodingResult.
|
||||
*
|
||||
* Nominatim differs from Photon/Pelias in three subtle ways we lock in:
|
||||
* Nominatim differs from Photon in three subtle ways we lock in:
|
||||
* 1. Lat/lon are STRINGS, not numbers — the normalizer must parseFloat.
|
||||
* 2. Display name is a comma-noisy hierarchy ("Konzil, Hafenstraße,
|
||||
* Konstanz, Konstanz, Regierungsbezirk Freiburg, Baden-Württemberg,
|
||||
|
|
@ -135,16 +135,4 @@ describe('normalizeNominatimResult', () => {
|
|||
});
|
||||
expect(result.provider).toBe('nominatim');
|
||||
});
|
||||
|
||||
it('does not set peliasCategories', () => {
|
||||
// Consumer side keys off the absence of this field as a "fallback
|
||||
// provider" signal.
|
||||
const result = normalizeNominatimResult({
|
||||
lat: '47.0',
|
||||
lon: '9.0',
|
||||
class: 'amenity',
|
||||
type: 'restaurant',
|
||||
});
|
||||
expect(result.peliasCategories).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -44,8 +44,6 @@ describe('normalizePhotonFeature', () => {
|
|||
});
|
||||
expect(result.confidence).toBeCloseTo(0.78, 2);
|
||||
expect(result.provider).toBe('photon');
|
||||
// peliasCategories deliberately absent for non-Pelias providers
|
||||
expect(result.peliasCategories).toBeUndefined();
|
||||
});
|
||||
|
||||
it('builds label from structured fields', () => {
|
||||
|
|
@ -111,7 +109,7 @@ describe('normalizePhotonFeature', () => {
|
|||
});
|
||||
|
||||
it('coordinates: Photon emits [lon, lat] — normalizer must NOT swap', () => {
|
||||
// Catches the all-too-easy lon/lat flip when porting from Pelias.
|
||||
// Catches the all-too-easy lon/lat flip in Photon's GeoJSON.
|
||||
const result = normalizePhotonFeature({
|
||||
type: 'Feature',
|
||||
geometry: { type: 'Point', coordinates: [9.1758, 47.6634] },
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ export type ChainNotice =
|
|||
/** Sensitive query was blocked from public providers and no local
|
||||
* provider was healthy → no results, but the absence is intentional. */
|
||||
| 'sensitive_local_unavailable'
|
||||
/** A non-Pelias provider served the request (Pelias was down). */
|
||||
/** A public provider served the request (the local provider was down). */
|
||||
| 'fallback_used';
|
||||
|
||||
export interface ChainOptions {
|
||||
|
|
@ -161,9 +161,9 @@ export class ProviderChain {
|
|||
}
|
||||
|
||||
// Stale or missing — refresh. We don't await this aggressively in
|
||||
// happy paths (Pelias up + healthy is the cheapest case), but on
|
||||
// cold-start every entry is missing so the first request pays for
|
||||
// one health probe per provider.
|
||||
// happy paths (photon-self up + healthy is the cheapest case),
|
||||
// but on cold-start every entry is missing so the first request
|
||||
// pays for one health probe per provider.
|
||||
const healthy = await provider.health(signal);
|
||||
this.health.set(provider.name, { healthy, checkedAt: now });
|
||||
if (!healthy) {
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@
|
|||
* search/reverse. A custom `User-Agent` is required (Nominatim returns
|
||||
* 403 to default-UA fetches).
|
||||
*
|
||||
* Compared to Pelias/Photon, Nominatim returns a single flat array
|
||||
* rather than GeoJSON. We adapt the shape and synthesize a confidence
|
||||
* score from `importance`.
|
||||
* Unlike Photon, Nominatim returns a single flat array rather than
|
||||
* GeoJSON. We adapt the shape and synthesize a confidence score from
|
||||
* `importance`.
|
||||
*
|
||||
* https://nominatim.org/release-docs/develop/api/Search/
|
||||
* https://operations.osmfoundation.org/policies/nominatim/
|
||||
|
|
|
|||
|
|
@ -1,178 +0,0 @@
|
|||
/**
|
||||
* Pelias provider — primary backend, self-hosted with the DACH OSM index.
|
||||
*
|
||||
* Forward-search uses /autocomplete first (fast venue match) and falls
|
||||
* back to /search if autocomplete returns zero features (autocomplete
|
||||
* deliberately excludes the address layer for perf).
|
||||
*/
|
||||
|
||||
import { mapPeliasToPlaceCategory } from '../lib/category-map';
|
||||
import type {
|
||||
GeocodingProvider,
|
||||
GeocodingResult,
|
||||
ProviderResponse,
|
||||
ReverseRequest,
|
||||
SearchRequest,
|
||||
} from './types';
|
||||
|
||||
export interface PeliasConfig {
|
||||
apiUrl: string;
|
||||
timeoutMs: number;
|
||||
}
|
||||
|
||||
export class PeliasProvider implements GeocodingProvider {
|
||||
readonly name = 'pelias' as const;
|
||||
readonly privacy = 'local' as const;
|
||||
|
||||
constructor(private readonly config: PeliasConfig) {}
|
||||
|
||||
async search(req: SearchRequest, signal?: AbortSignal): Promise<ProviderResponse> {
|
||||
const params = new URLSearchParams({
|
||||
text: req.q.trim(),
|
||||
size: String(req.limit),
|
||||
lang: req.lang,
|
||||
});
|
||||
if (req.focusLat && req.focusLon) {
|
||||
params.set('focus.point.lat', req.focusLat);
|
||||
params.set('focus.point.lon', req.focusLon);
|
||||
}
|
||||
|
||||
// /autocomplete first (fast venue match), then /search if empty.
|
||||
// Both attempts are wrapped in the same external timeout signal so
|
||||
// a cumulative slow Pelias still falls through to the next provider.
|
||||
try {
|
||||
const ac = await this.fetch(`/autocomplete?${params}`, signal);
|
||||
if (!ac.ok) return { ok: false, kind: 'unreachable', status: ac.status };
|
||||
let features = ac.features;
|
||||
|
||||
if (features.length === 0) {
|
||||
const s = await this.fetch(`/search?${params}`, signal);
|
||||
if (s.ok) features = s.features;
|
||||
// /search returning a non-OK after /autocomplete returned OK-but-empty
|
||||
// is a clean zero-results answer, not a fall-through. We trust the
|
||||
// successful autocomplete probe.
|
||||
}
|
||||
|
||||
return { ok: true, results: features.map(normalizePeliasFeature) };
|
||||
} catch (e) {
|
||||
return { ok: false, kind: 'unreachable', error: errorMessage(e) };
|
||||
}
|
||||
}
|
||||
|
||||
async reverse(req: ReverseRequest, signal?: AbortSignal): Promise<ProviderResponse> {
|
||||
const params = new URLSearchParams({
|
||||
'point.lat': req.lat,
|
||||
'point.lon': req.lon,
|
||||
size: '3',
|
||||
lang: req.lang,
|
||||
});
|
||||
|
||||
try {
|
||||
const r = await this.fetch(`/reverse?${params}`, signal);
|
||||
if (!r.ok) return { ok: false, kind: 'unreachable', status: r.status };
|
||||
return { ok: true, results: r.features.map(normalizePeliasFeature) };
|
||||
} catch (e) {
|
||||
return { ok: false, kind: 'unreachable', error: errorMessage(e) };
|
||||
}
|
||||
}
|
||||
|
||||
async health(signal?: AbortSignal): Promise<boolean> {
|
||||
try {
|
||||
const url = `${this.config.apiUrl}/status`;
|
||||
const res = await fetch(url, {
|
||||
signal: combineSignals(signal, AbortSignal.timeout(this.config.timeoutMs)),
|
||||
});
|
||||
// /v1/status doesn't exist on every Pelias version — a 404 still
|
||||
// means the server is up. Anything else (5xx, ECONNREFUSED, timeout)
|
||||
// is unhealthy.
|
||||
return res.ok || res.status === 404;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private async fetch(
|
||||
path: string,
|
||||
signal?: AbortSignal
|
||||
): Promise<{ ok: boolean; status: number; features: PeliasFeature[] }> {
|
||||
const res = await fetch(`${this.config.apiUrl}${path}`, {
|
||||
signal: combineSignals(signal, AbortSignal.timeout(this.config.timeoutMs)),
|
||||
});
|
||||
if (!res.ok) return { ok: false, status: res.status, features: [] };
|
||||
const data = (await res.json()) as PeliasResponse;
|
||||
return { ok: true, status: res.status, features: data.features ?? [] };
|
||||
}
|
||||
}
|
||||
|
||||
// --- Pelias native types ---
|
||||
|
||||
interface PeliasResponse {
|
||||
type: 'FeatureCollection';
|
||||
features: PeliasFeature[];
|
||||
}
|
||||
|
||||
interface PeliasFeature {
|
||||
type: 'Feature';
|
||||
geometry: {
|
||||
type: 'Point';
|
||||
coordinates: [number, number]; // [lon, lat]
|
||||
};
|
||||
properties: {
|
||||
id?: string;
|
||||
name?: string;
|
||||
label?: string;
|
||||
confidence?: number;
|
||||
layer?: string;
|
||||
street?: string;
|
||||
housenumber?: string;
|
||||
postalcode?: string;
|
||||
locality?: string;
|
||||
region?: string;
|
||||
country?: string;
|
||||
category?: string[];
|
||||
};
|
||||
}
|
||||
|
||||
export function normalizePeliasFeature(feature: PeliasFeature): GeocodingResult {
|
||||
const props = feature.properties;
|
||||
const [lon, lat] = feature.geometry.coordinates;
|
||||
|
||||
return {
|
||||
label: props.label || props.name || '',
|
||||
name: props.name || '',
|
||||
latitude: lat,
|
||||
longitude: lon,
|
||||
address: {
|
||||
street: props.street,
|
||||
houseNumber: props.housenumber,
|
||||
postalCode: props.postalcode,
|
||||
city: props.locality,
|
||||
state: props.region,
|
||||
country: props.country,
|
||||
},
|
||||
category: mapPeliasToPlaceCategory(props.category, props.layer),
|
||||
peliasCategories: props.category,
|
||||
confidence: props.confidence ?? 0,
|
||||
provider: 'pelias',
|
||||
};
|
||||
}
|
||||
|
||||
function errorMessage(e: unknown): string {
|
||||
return e instanceof Error ? e.message : String(e);
|
||||
}
|
||||
|
||||
/** Combine an external AbortSignal with our own timeout signal. AbortSignal.any
|
||||
* exists in Bun but TS typing is patchy across runtimes — small helper. */
|
||||
function combineSignals(...signals: Array<AbortSignal | undefined>): AbortSignal {
|
||||
const real = signals.filter((s): s is AbortSignal => !!s);
|
||||
if (real.length === 1) return real[0];
|
||||
const ctrl = new AbortController();
|
||||
for (const s of real) {
|
||||
if (s.aborted) {
|
||||
ctrl.abort(s.reason);
|
||||
break;
|
||||
}
|
||||
s.addEventListener('abort', () => ctrl.abort(s.reason), { once: true });
|
||||
}
|
||||
return ctrl.signal;
|
||||
}
|
||||
|
|
@ -5,15 +5,10 @@
|
|||
* importer). The HTTP shape is GeoJSON FeatureCollection with `properties`
|
||||
* holding `osm_key`/`osm_value` raw OSM tags + structured address fields.
|
||||
*
|
||||
* Compared to Pelias:
|
||||
* + No rate limit advertised, but be a polite neighbor: short timeouts,
|
||||
* no retries, cache aggressively.
|
||||
* + Reverse geocoding takes lon/lat (note the order — different from
|
||||
* Pelias's point.lat/point.lon). Easy to flip if not careful.
|
||||
* - No `confidence` field. We approximate from `importance` (0–1) when
|
||||
* present, else 0.5 as a neutral default.
|
||||
* - No DACH-specific tuning — German venue names sometimes lose umlauts
|
||||
* in display labels. Acceptable for a fallback.
|
||||
* Same class powers both `photon-self` (self-hosted, privacy: 'local')
|
||||
* and `photon` (public komoot.io, privacy: 'public'). Reverse-geocoding
|
||||
* takes lon/lat (note the order). Confidence is approximated from
|
||||
* `importance` (0–1) when present, else 0.5 as a neutral default.
|
||||
*/
|
||||
|
||||
import { mapOsmTagToPlaceCategory } from '../lib/osm-category-map';
|
||||
|
|
@ -207,9 +202,6 @@ export function normalizePhotonFeature(
|
|||
country: props.country,
|
||||
},
|
||||
category,
|
||||
// peliasCategories deliberately omitted — Photon has osm_key:osm_value
|
||||
// but the consumer side keys off the absence of this field as a
|
||||
// "result came from a fallback" signal.
|
||||
confidence: typeof props.importance === 'number' ? props.importance : 0.5,
|
||||
provider: providerName,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -29,12 +29,8 @@ export interface GeocodingResult {
|
|||
};
|
||||
/** Our Places category, derived from the provider's native taxonomy. */
|
||||
category: PlaceCategory;
|
||||
/** Raw Pelias categories (food, retail, transport, …) — only present
|
||||
* when the result came from Pelias. Photon/Nominatim don't have an
|
||||
* equivalent multi-tag taxonomy. */
|
||||
peliasCategories?: string[];
|
||||
/** Confidence score 0–1. Pelias provides this natively; Photon/Nominatim
|
||||
* approximate it from `importance`. */
|
||||
/** Confidence score 0–1. Photon/Nominatim approximate it from
|
||||
* `importance`. */
|
||||
confidence: number;
|
||||
/** Which provider answered — useful for telemetry + UI hints
|
||||
* ("approximate match" badge for fallback providers). */
|
||||
|
|
@ -42,8 +38,8 @@ export interface GeocodingResult {
|
|||
}
|
||||
|
||||
/**
|
||||
* Provider identifiers. Two of these wrap the same `PhotonProvider`
|
||||
* class with different configs:
|
||||
* Provider identifiers. `photon-self` and `photon` both wrap the same
|
||||
* `PhotonProvider` class with different configs:
|
||||
*
|
||||
* - `photon-self`: self-hosted Photon (typically on mana-gpu),
|
||||
* `privacy: 'local'`. Eligible for sensitive queries.
|
||||
|
|
@ -55,7 +51,7 @@ export interface GeocodingResult {
|
|||
* tracks per-provider health. A single `photon` slot can't simultaneously
|
||||
* mean two different backends.
|
||||
*/
|
||||
export type ProviderName = 'pelias' | 'photon-self' | 'photon' | 'nominatim';
|
||||
export type ProviderName = 'photon-self' | 'photon' | 'nominatim';
|
||||
|
||||
export interface SearchRequest {
|
||||
q: string;
|
||||
|
|
|
|||
|
|
@ -21,10 +21,10 @@ interface CachedAnswer {
|
|||
}
|
||||
|
||||
/**
|
||||
* TTL chooser. Public-API results (Photon/Nominatim) get the longer TTL —
|
||||
* caching aggressively is the main privacy lever once the query has
|
||||
* already left our network. Local results stay on the shorter TTL because
|
||||
* the Pelias index can be re-imported; we don't want stale local data.
|
||||
* TTL chooser. Public-API results (photon/nominatim) get a shorter TTL
|
||||
* (1h) so a transient blip in photon-self doesn't pin stale fallback
|
||||
* answers in the cache for days. Local results (photon-self) get the
|
||||
* longer 24h TTL.
|
||||
*
|
||||
* Sensitive-query notices are cached on the short TTL too (the user might
|
||||
* retry from a different angle quickly), and `undefined` provider (chain
|
||||
|
|
|
|||
|
|
@ -9,35 +9,43 @@ export function createHealthRoutes(config: Config, chain: ProviderChain) {
|
|||
app.get('/', (c) => c.json({ status: 'ok', service: 'mana-geocoding' }));
|
||||
|
||||
/**
|
||||
* Upstream Pelias health. Proxies a request to the Pelias API so
|
||||
* monitoring can reach it without `extra_hosts: host.docker.internal`
|
||||
* on the blackbox exporter.
|
||||
* Upstream photon-self health. Proxies a request to the self-hosted
|
||||
* Photon so monitoring can reach it without `extra_hosts:
|
||||
* host.docker.internal` on the blackbox exporter.
|
||||
*
|
||||
* Backwards-compatible: existing prometheus probes against this
|
||||
* endpoint keep working. Now reports `degraded` (200) instead of `down`
|
||||
* (503) when Pelias is unreachable but a fallback provider is healthy
|
||||
* — the system can still serve queries, just slower / less precise.
|
||||
* Reports `degraded` (200) instead of `down` (503) when photon-self is
|
||||
* unreachable but a public fallback (photon / nominatim) is healthy —
|
||||
* the system can still serve queries, just at the cost of leaking the
|
||||
* query content to a third party.
|
||||
*/
|
||||
app.get('/pelias', async (c) => {
|
||||
app.get('/photon-self', async (c) => {
|
||||
const upstream = config.photonSelf.apiUrl;
|
||||
if (!upstream) {
|
||||
return c.json({ status: 'unconfigured', error: 'PHOTON_SELF_API_URL is unset' }, 503);
|
||||
}
|
||||
try {
|
||||
const res = await fetch(`${config.pelias.apiUrl}/status`, {
|
||||
const res = await fetch(`${upstream}/api?q=Konstanz&limit=1`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
if (!res.ok && res.status !== 404) {
|
||||
if (!res.ok) {
|
||||
return c.json(
|
||||
{ status: 'degraded', upstream: res.status, fallbackAvailable: chainHasFallback(chain) },
|
||||
chainHasFallback(chain) ? 200 : 503
|
||||
{
|
||||
status: 'degraded',
|
||||
upstream: res.status,
|
||||
fallbackAvailable: chainHasPublicFallback(chain),
|
||||
},
|
||||
chainHasPublicFallback(chain) ? 200 : 503
|
||||
);
|
||||
}
|
||||
return c.json({ status: 'ok', upstream: 'pelias-api' });
|
||||
return c.json({ status: 'ok', upstream: 'photon-self' });
|
||||
} catch (e) {
|
||||
return c.json(
|
||||
{
|
||||
status: chainHasFallback(chain) ? 'degraded' : 'down',
|
||||
status: chainHasPublicFallback(chain) ? 'degraded' : 'down',
|
||||
error: e instanceof Error ? e.message : 'unknown',
|
||||
fallbackAvailable: chainHasFallback(chain),
|
||||
fallbackAvailable: chainHasPublicFallback(chain),
|
||||
},
|
||||
chainHasFallback(chain) ? 200 : 503
|
||||
chainHasPublicFallback(chain) ? 200 : 503
|
||||
);
|
||||
}
|
||||
});
|
||||
|
|
@ -56,10 +64,10 @@ export function createHealthRoutes(config: Config, chain: ProviderChain) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check if any non-Pelias provider is currently believed healthy. Used
|
||||
* to soften /pelias health to "degraded" instead of "down" when a
|
||||
* fallback can still serve traffic.
|
||||
* Check if any public fallback provider is currently believed healthy.
|
||||
* Used to soften /photon-self health to "degraded" instead of "down"
|
||||
* when a public fallback can still serve traffic.
|
||||
*/
|
||||
function chainHasFallback(chain: ProviderChain): boolean {
|
||||
return chain.getHealthSnapshot().some((p) => p.name !== 'pelias' && p.healthy);
|
||||
function chainHasPublicFallback(chain: ProviderChain): boolean {
|
||||
return chain.getHealthSnapshot().some((p) => p.name !== 'photon-self' && p.healthy);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue