chore(geocoding): remove Pelias + close 3 bypass paths to public Nominatim

Pelias was retired from the Mac mini on 2026-04-28; photon-self
(self-hosted Photon on mana-gpu) has been the live primary since then.
This removes the now-dead Pelias adapter, config, tests, and the
services/mana-geocoding/pelias/ stack — the entire compose file, the
geojsonify_place_details.js patch, the setup.sh import script.

Provider chain is now `photon-self → photon → nominatim`. The chain
keeps its `privacy: 'local' | 'public'` split, sensitive-query
blocking, coord quantization, and aggressive caching unchanged.

Three direct calls to nominatim.openstreetmap.org that bypassed
mana-geocoding now route through the wrapper:

- citycorners/add-city + citycorners/cities/[slug]/add use the shared
  searchAddress() client (browser → same-origin proxy → mana-geocoding
  → photon-self).
- memoro mobile drops its OSM reverse-geocoding fallback entirely;
  Expo's on-device reverse-geocoding stays as the sole path. Routing
  through the wrapper would require a memoro-server proxy endpoint —
  a follow-up if Expo's quality proves insufficient.

Other behavioral changes:

- CACHE_PUBLIC_TTL_MS dropped from 7d → 1h. The long TTL was a
  privacy-amplification trick from the Pelias era; with photon-self
  serving the bulk of traffic, a transient cross-LAN blip was pinning
  cached fallback answers for days. 1h gives quick recovery.
- /health/pelias renamed to /health/photon-self; prometheus blackbox
  config + status-page generator updated.
- mana-geocoding container no longer needs `extra_hosts:
  host.docker.internal:host-gateway` (was only there for the
  Pelias-on-host-network era).

113 tests passing. CLAUDE.md rewritten to reflect the post-Pelias
architecture.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-28 22:12:26 +02:00
parent 7bca16dfa7
commit 2bbcf14aba
35 changed files with 330 additions and 1262 deletions

View file

@ -1,9 +1,11 @@
/**
* Shared geocoding client for all modules in the unified Mana app.
*
* Talks to our self-hosted mana-geocoding service (Pelias-backed, port 3018).
* All queries stay within our infrastructure no user location data leaves
* the network.
* Talks to mana-geocoding (port 3018), which fronts a provider chain
* (photon-self public photon public nominatim) with sensitive-query
* blocking and coord quantization. Sensitive + happy-path queries stay
* on our infrastructure via photon-self; only last-resort fallbacks
* leave the network.
*
* Used by: places, events, contacts, photos,
*
@ -66,26 +68,24 @@ export interface GeocodingResult {
longitude: number;
address: GeocodingAddress;
category: PlaceCategory;
/** Raw Pelias categories (food, retail, transport, ) only present
* when the result came from Pelias. */
peliasCategories?: string[];
confidence: number;
/** Which backend served this result. `pelias` is local; `photon` and
* `nominatim` are public APIs (the wrapper applies sensitive-query
* blocking + coord quantization before forwarding to those). */
provider?: 'pelias' | 'photon' | 'nominatim';
/** Which backend served this result. `photon-self` is our self-hosted
* Photon (privacy: 'local'); `photon` and `nominatim` are public APIs
* (the wrapper applies sensitive-query blocking + coord quantization
* before forwarding to those). */
provider?: 'photon-self' | 'photon' | 'nominatim';
}
/**
* Out-of-band information returned alongside results the wrapper uses
* this to signal *why* a query had unusual behavior:
*
* - `'fallback_used'`: Pelias was unreachable, so a public-API provider
* served the request. Results are still valid but may be less precise.
* UI should show a subtle "approximate" badge.
* - `'fallback_used'`: photon-self was unreachable, so a public-API
* provider served the request. Results are still valid but may be
* less precise. UI should show a subtle "approximate" badge.
* - `'sensitive_local_unavailable'`: the query matched the wrapper's
* sensitive-keyword list (medical / mental-health / crisis service)
* AND the local Pelias was unreachable. The wrapper deliberately did
* AND no local provider was reachable. The wrapper deliberately did
* NOT forward the query to public APIs. Results are empty by design.
* UI should explain this to the user.
*/
@ -95,7 +95,7 @@ interface GeocodingResponse {
results: GeocodingResult[];
cached?: boolean;
error?: string;
provider?: 'pelias' | 'photon' | 'nominatim';
provider?: 'photon-self' | 'photon' | 'nominatim';
notice?: GeocodingNotice;
}
@ -109,7 +109,7 @@ interface GeocodingResponse {
*/
export interface SearchOutcome {
results: GeocodingResult[];
provider?: 'pelias' | 'photon' | 'nominatim';
provider?: 'photon-self' | 'photon' | 'nominatim';
notice?: GeocodingNotice;
}

View file

@ -6,6 +6,7 @@
import { cityTable, useAllCities } from '$lib/modules/citycorners';
import type { LocalCity } from '$lib/modules/citycorners/types';
import { RoutePage } from '$lib/components/shell';
import { searchAddress } from '$lib/geocoding';
const allCities = useAllCities();
@ -44,14 +45,10 @@
geocoding = true;
try {
const searchQ = country.trim() ? `${q}, ${country.trim()}` : q;
const res = await fetch(
`https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(searchQ)}&limit=1`,
{ headers: { 'User-Agent': 'CityCorners/1.0' } }
);
const results = await res.json();
const results = await searchAddress(searchQ, { limit: 1 });
if (results.length > 0) {
latitude = parseFloat(results[0].lat);
longitude = parseFloat(results[0].lon);
latitude = results[0].latitude;
longitude = results[0].longitude;
}
} catch {
// best-effort

View file

@ -8,6 +8,7 @@
import { ccLocationTable, CATEGORY_KEYS } from '$lib/modules/citycorners';
import type { LocalCity, LocalLocation } from '$lib/modules/citycorners/types';
import { RoutePage } from '$lib/components/shell';
import { searchAddress } from '$lib/geocoding';
const cityCtx = getContext<{ value: LocalCity | undefined }>('currentCity');
let city = $derived(cityCtx.value);
@ -58,14 +59,10 @@
cityName && !addr.toLowerCase().includes(cityName.toLowerCase())
? `${addr}, ${cityName}`
: addr;
const res = await fetch(
`https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(q)}&limit=1`,
{ headers: { 'User-Agent': 'CityCorners/1.0' } }
);
const results = await res.json();
const results = await searchAddress(q, { limit: 1 });
if (results.length > 0) {
latitude = parseFloat(results[0].lat);
longitude = parseFloat(results[0].lon);
latitude = results[0].latitude;
longitude = results[0].longitude;
}
} catch {
// Geocoding is best-effort

View file

@ -25,8 +25,8 @@
* If we ever want to rate-limit by user we can add JWT verification here
* without touching the upstream service.
*
* Also proxies /health and /health/pelias so the SvelteKit status page
* (/status) can check the service from its server side.
* Also proxies /health and /health/photon-self so the SvelteKit status
* page (/status) can check the service from its server side.
*/
import { error } from '@sveltejs/kit';

View file

@ -155,71 +155,26 @@ export const reverseGeocodeWithExpo = async (
};
/**
* Führt ein Reverse Geocoding mit OpenStreetMap/Nominatim durch
* @param latitude Breitengrad
* @param longitude Längengrad
* @returns Adressinformationen oder null bei Fehler
*/
export const reverseGeocodeWithOSM = async (
latitude: number,
longitude: number
): Promise<AddressInfo | null> => {
try {
const url = `https://nominatim.openstreetmap.org/reverse?format=json&lat=${latitude}&lon=${longitude}&addressdetails=1`;
const response = await fetch(url, {
headers: {
'User-Agent': 'Memoro App', // OSM erfordert einen User-Agent
},
});
if (!response.ok) {
throw new Error(`OSM API responded with status: ${response.status}`);
}
const data = await response.json();
if (data && data.address) {
return {
street: data.address.road || data.address.pedestrian || data.address.street,
streetNumber: data.address.house_number,
postalCode: data.address.postcode,
city: data.address.city || data.address.town || data.address.village,
district: data.address.suburb || data.address.neighbourhood,
region: data.address.state,
country: data.address.country,
name: data.name,
formattedAddress: data.display_name,
};
}
return null;
} catch (error) {
console.debug('Fehler beim Reverse Geocoding mit OSM:', error);
return null;
}
};
/**
* Führt ein Reverse Geocoding durch und versucht, die beste verfügbare Adresse zu ermitteln
* @param latitude Breitengrad
* @param longitude Längengrad
* @returns Adressinformationen oder null bei Fehler
* Führt ein Reverse Geocoding durch. Nutzt ausschließlich Expo's
* On-Device Reverse-Geocoding keine direkten Calls an
* nominatim.openstreetmap.org, weil das die User-IP + Coords ungeschützt
* an einen Public-Service leakt. Wenn Expo keine Adresse liefert,
* geben wir null zurück.
*
* Falls Expo's Qualität auf Dauer nicht reicht, ist der richtige Fix
* ein Proxy-Endpoint im memoro-server, der intern an mana-geocoding
* weiterreicht (Privacy-Hardening + Photon-Self).
*/
export const getAddressFromCoordinates = async (
latitude: number,
longitude: number
): Promise<AddressInfo | null> => {
try {
// Zuerst mit Expo versuchen
const expoResult = await reverseGeocodeWithExpo(latitude, longitude);
// Wenn Expo ein gutes Ergebnis liefert, dieses verwenden
if (expoResult && expoResult.street && expoResult.city) {
return expoResult;
}
// Ansonsten mit OSM versuchen
return await reverseGeocodeWithOSM(latitude, longitude);
return expoResult;
} catch (error) {
console.debug('Fehler beim Reverse Geocoding:', error);
return null;

View file

@ -491,10 +491,9 @@ services:
- "traefik.http.services.mana-events.loadbalancer.server.port=3065"
# ─── Geocoding ───────────────────────────────────────────
# Thin Hono wrapper in front of a self-hosted Pelias stack.
# Pelias itself (elasticsearch + api + libpostal) runs from a separate
# compose file in services/mana-geocoding/pelias/ — see
# services/mana-geocoding/CLAUDE.md for the initial import procedure.
# Thin Hono wrapper in front of self-hosted Photon (mana-gpu) with
# public photon.komoot.io and Nominatim as last-resort fallbacks.
# See services/mana-geocoding/CLAUDE.md for provider-chain details.
# Internal-only: no traefik labels, not exposed via Cloudflare.
mana-geocoding:
build:
@ -504,14 +503,9 @@ services:
container_name: mana-geocoding
restart: always
mem_limit: 128m
# Pelias runs on host network via its own compose, so the wrapper
# reaches it via host.docker.internal (Pelias API at :4000).
extra_hosts:
- "host.docker.internal:host-gateway"
environment:
TZ: Europe/Berlin
PORT: 3018
PELIAS_API_URL: http://host.docker.internal:4000/v1
# Self-hosted Photon on mana-gpu (cross-LAN). When set, registers
# as `photon-self` provider with privacy: 'local' — eligible for
# sensitive queries. Empty value = slot disabled.

View file

@ -314,9 +314,9 @@ scrape_configs:
- targets:
# mana-geocoding's own health (Hono wrapper)
- http://mana-geocoding:3018/health
# Upstream Pelias health, proxied through the wrapper so the
# Upstream photon-self health, proxied through the wrapper so the
# blackbox-exporter doesn't need host.docker.internal access.
- http://mana-geocoding:3018/health/pelias
- http://mana-geocoding:3018/health/photon-self
# mana-ai (Mission Runner) — internal-only, no CF tunnel.
- http://mana-ai:3066/health
relabel_configs:

View file

@ -69,8 +69,8 @@ friendly_name() {
name="${name#http://}"
# Interne Services (Docker-Netz): mana-geocoding:3018/health → Mana Geocoding
case "$name" in
mana-geocoding:*/health/pelias)
name="Pelias (via Geocoding)"
mana-geocoding:*/health/photon-self)
name="Photon-Self (via Geocoding)"
;;
mana-geocoding:*)
name="Mana Geocoding"

View file

@ -1,6 +1,12 @@
# mana-geocoding
Geocoding service for the Places module. **Provider-chain architecture** — tries a self-hosted Pelias first, falls back to public Photon (komoot) and then public Nominatim (OSM) when Pelias is unhealthy or unreachable. All Pelias-served queries stay on our infrastructure; fallback queries leak the search string to a public OSM endpoint.
Geocoding service for the Places module and other map-aware modules.
**Provider-chain architecture** — tries self-hosted Photon (`photon-self`,
on mana-gpu) first, falls back to public Photon (komoot) and then public
Nominatim (OSM) when photon-self is unhealthy. All photon-self queries
stay on our infrastructure; fallback queries leak the search string to a
public OSM endpoint, with sensitive-query blocking + coord quantization
+ aggressive caching as privacy mitigations.
## Tech Stack
@ -8,51 +14,51 @@ Geocoding service for the Places module. **Provider-chain architecture** — tri
|-------|------------|
| **Runtime** | Bun |
| **Framework** | Hono |
| **Primary geocoder** | Pelias (self-hosted, Elasticsearch-backed) |
| **Primary geocoder** | Self-hosted Photon (`photon-self`, on mana-gpu via WSL2) |
| **Fallback 1** | [Photon](https://photon.komoot.io) (public, no rate limit advertised) |
| **Fallback 2** | [Nominatim](https://nominatim.openstreetmap.org) (public, 1 req/sec strict) |
| **Data** | OpenStreetMap DACH extract (DE/AT/CH) for Pelias; global OSM for the public fallbacks |
| **Caching** | In-memory LRU (5000 entries, 24h TTL) — applies to all provider answers |
| **Data** | Photon-Europe pre-built index (Java JAR + embedded OpenSearch) |
| **Caching** | In-memory LRU (5000 entries; 24h for `photon-self`, 1h for public answers) |
## Port: 3018
## Pelias has been retired
Pelias was the original primary backend (DACH OSM index, Elasticsearch +
libpostal). It was stopped on 2026-04-28 because it ate ~3.2 GB RAM on
the Mac mini and was crushing the host into 8.6 GB swap. The provider
adapter, the JSON config patch hacks, and the entire `pelias/` stack
were removed from this repo on the same day. See
[`docs/reports/geocoding-self-hosting-2026-04-28.md`](../../docs/reports/geocoding-self-hosting-2026-04-28.md)
for the decision rationale and the migration log with WSL2 gotchas.
## Quick Start
```bash
# 1. Start Pelias stack (first time: run setup.sh for data import)
cd services/mana-geocoding/pelias
docker compose up -d
# First time only:
chmod +x setup.sh && ./setup.sh
# 2. Start the Hono wrapper
cd services/mana-geocoding
bun run dev
```
The wrapper boots with no upstream of its own (it's a thin proxy in
front of `photon-self` + public providers). For a real local-dev hit
against `photon-self`, set `PHOTON_SELF_API_URL` to the GPU server
(e.g. `http://192.168.178.11:2322`); otherwise the chain runs on the
public providers only.
## API Endpoints
All endpoints are public (no auth required) — the service is internal-only, not exposed to the internet.
All endpoints are public (no auth required) — the service is internal-only,
not exposed to the internet. The web app reaches it via a same-origin
proxy at `apps/mana/apps/web/src/routes/api/v1/geocode/[...path]/+server.ts`.
| Method | Path | Description |
|--------|------|-------------|
| GET | `/api/v1/geocode/search?q=...` | Forward geocoding / autocomplete |
| GET | `/api/v1/geocode/reverse?lat=...&lon=...` | Reverse geocoding |
| GET | `/api/v1/geocode/stats` | Cache statistics |
| GET | `/api/v1/geocode/stats` | Cache statistics + provider snapshot |
| GET | `/health` | Wrapper health |
| GET | `/health/pelias` | Upstream Pelias health (used by blackbox monitoring) |
### Forward-search strategy
The wrapper queries Pelias `/autocomplete` first (fast, fuzzy, optimised for
venue names like "Konzil Restaurant"). If that returns zero features, it
falls back to `/search`, which covers the address layer that autocomplete
deliberately excludes as a performance optimisation.
This gives the best of both worlds: quick venue matches for free-text
queries AND reliable results for street-style queries like "Marktstätte
Konstanz". See `src/routes/geocode.ts` — the fallback is baked into the
forward handler.
| GET | `/health/photon-self` | Upstream `photon-self` health (used by blackbox monitoring) |
| GET | `/health/providers` | Per-provider health snapshot |
### Search params
@ -78,7 +84,7 @@ forward handler.
{
"results": [
{
"label": "Münster Café, Münsterplatz 3, 78462 Konstanz",
"label": "Münster Café, Münsterplatz 3, 78462 Konstanz, Deutschland",
"name": "Münster Café",
"latitude": 47.663,
"longitude": 9.175,
@ -87,75 +93,53 @@ forward handler.
"houseNumber": "3",
"postalCode": "78462",
"city": "Konstanz",
"country": "Germany"
"state": "Baden-Württemberg",
"country": "Deutschland"
},
"category": "food",
"peliasCategories": ["food", "retail", "nightlife"],
"confidence": 0.95
"confidence": 0.78,
"provider": "photon-self"
}
]
],
"provider": "photon-self",
"tried": ["photon-self"]
}
```
The response body includes `provider: 'photon-self' | 'photon' | 'nominatim'`,
`tried: ProviderName[]`, and an optional `notice`
(`'fallback_used'` or `'sensitive_local_unavailable'`) so the caller can
render an "approximate match" hint or explain why a sensitive query
returned 0 results.
## Category Mapping
Pelias' OSM importer tags each venue with its own taxonomy (`food`, `retail`,
`transport`, `health`, `education`, …). We collapse those into the 7
PlaceCategories used by the Places module, using a **priority-ordered list**
so the most specific signal wins:
Photon and Nominatim emit raw OSM tags (`amenity:restaurant`,
`shop:supermarket`, `public_transport:station`, …) which we collapse
into the 7 PlaceCategories used by the Places module. Mapping logic in
`src/lib/osm-category-map.ts` — priority-ordered so the most specific
signal wins (e.g. `amenity:restaurant``food` even if also tagged as
`shop`).
| PlaceCategory | Wins if Pelias categories contain |
|---------------|-----------------------------------|
| `food` | `food` (beats retail/nightlife — a restaurant is food) |
| `transit` | `transport`, `transport:public`, `transport:air`, `transport:bus`, `transport:taxi`, `transport:sea` |
| `shopping` | `retail` (when no `food` present) |
| `leisure` | `entertainment`, `nightlife`, `recreation` |
| `work` | `education`, `professional`, `government`, `finance` |
| `other` | `health`, `religion`, everything else |
| PlaceCategory | Wins for tags |
|---------------|---------------|
| `food` | `amenity:restaurant`, `amenity:cafe`, `amenity:fast_food`, `amenity:bar`, `amenity:pub`, `amenity:bakery` |
| `transit` | `amenity:bus_station`, `public_transport:station`, `railway:station`, `aeroway:terminal`, `amenity:car_rental` |
| `shopping` | `shop` (any value) |
| `leisure` | `leisure` (most), `tourism:attraction`, `amenity:cinema`, `amenity:theatre` |
| `work` | `office`, `amenity:bank`, `amenity:townhall`, `amenity:embassy`, `amenity:school`, `amenity:university` |
| `other` | health (`amenity:hospital`, `amenity:clinic`, `healthcare:*`), religion (`amenity:place_of_worship`), addresses, fall-through |
| `home` | (not auto-detected — set manually by the user) |
**Example mappings verified on the DACH index:**
| OSM venue | Pelias categories | → PlaceCategory |
|-----------|-------------------|-----------------|
| Konzil Konstanz Restaurant | `[food, retail, nightlife]` | `food` |
| Bahnhof Konstanz | `[transport, transport:station]` | `transit` |
| Physiotherapie-Schule | `[education]` | `work` |
| MX-Park (Rennstrecke) | `[recreation]` | `leisure` |
The priority list lives in `src/lib/category-map.ts` — update it if you want
a Pelias category to map somewhere else.
### Critical: the Pelias API patch
By default, Pelias **hides** the `category` field from API responses unless
the caller explicitly passes `?categories=...` — a quirk intended for keyword
filtering that also strips category metadata from normal address queries. We
work around this by mounting a **patched copy** of
`helper/geojsonify_place_details.js` over the upstream one in the `pelias-api`
container (`pelias/geojsonify_place_details.js`). The patch changes
`condition: checkCategoryParam``condition: () => true` so the category
array always flows through to the wrapper.
If you bump the `pelias/api` image, regenerate the patched file:
```bash
cd services/mana-geocoding/pelias
docker run --rm pelias/api:latest cat /code/pelias/api/helper/geojsonify_place_details.js \
| sed 's|condition: checkCategoryParam|condition: () => true|' \
> geojsonify_place_details.js
docker compose up -d --force-recreate api
```
## Configuration
```env
PORT=3018
# --- Provider chain (tried in order) ----------------------------------
# Default order: photon-self,pelias,photon,nominatim
# Default order: photon-self,photon,nominatim
# `photon-self` is silently dropped if PHOTON_SELF_API_URL is unset.
GEOCODING_PROVIDERS=photon-self,pelias,photon,nominatim
GEOCODING_PROVIDERS=photon-self,photon,nominatim
PROVIDER_TIMEOUT_MS=8000 # per-provider request timeout (cold-start safe)
PROVIDER_HEALTH_CACHE_MS=30000 # health-cache TTL — skip dead providers
@ -165,13 +149,10 @@ PROVIDER_HEALTH_CACHE_MS=30000 # health-cache TTL — skip dead providers
# Set in .env.macmini; flow into the container via docker-compose env.
PHOTON_SELF_API_URL=http://192.168.178.11:2322
# --- Pelias (legacy, currently stopped — privacy: 'local') ------------
PELIAS_API_URL=http://pelias-api:4000/v1
# --- Public Photon (privacy: 'public', last-resort fallback) ----------
PHOTON_API_URL=https://photon.komoot.io
# --- Nominatim (fallback 2) -------------------------------------------
# --- Nominatim (last-resort fallback) ---------------------------------
NOMINATIM_API_URL=https://nominatim.openstreetmap.org
NOMINATIM_USER_AGENT=mana-geocoding/1.0 (+https://mana.how; kontakt@memoro.ai)
NOMINATIM_INTERVAL_MS=1100 # >= 1000 to honor 1 req/sec policy
@ -180,7 +161,9 @@ NOMINATIM_INTERVAL_MS=1100 # >= 1000 to honor 1 req/sec policy
CORS_ORIGINS=http://localhost:5173,https://mana.how
CACHE_MAX_ENTRIES=5000
CACHE_TTL_MS=86400000 # 24h — used for local-provider answers
CACHE_PUBLIC_TTL_MS=604800000 # 7d — extended TTL for public-API answers (privacy)
CACHE_PUBLIC_TTL_MS=3600000 # 1h — short TTL for public-API answers so a
# transient photon-self blip doesn't pin
# stale fallback answers in cache for days.
```
To **disable a provider**, drop it from `GEOCODING_PROVIDERS`. To run with
@ -195,9 +178,7 @@ The dual-Photon split:
fallback for non-sensitive queries when self-hosted is down.
Both share the same `PhotonProvider` class — only the URL, name, and
privacy stance differ. See the [migration runbook](../../docs/runbooks/photon-on-mana-gpu.md)
and [decision report](../../docs/reports/geocoding-self-hosting-2026-04-28.md)
for the operational story.
privacy stance differ.
## Provider-chain semantics
@ -213,40 +194,44 @@ skips it for the rest of the cache window. The next request after the cache
expires re-probes lazily — there is no background health pinger.
```
Client (Places module)
Client (Places module, etc.)
→ mana-geocoding (Hono, port 3018)
→ LRU cache (24h TTL) ← hit: ~0 ms
→ LRU cache (24h local / 1h public) ← hit: ~0 ms
→ Provider chain
1. Pelias ← reachable: 50200 ms (DACH index, fully featured)
2. Photon ← fallback: 200500 ms public, partial features
3. Nominatim ← last resort: 200800 ms + 1 req/sec queue
1. photon-self ← reachable: 50200 ms (cross-LAN to mana-gpu)
2. photon ← public fallback: 200500 ms
3. nominatim ← last resort: 200800 ms + 1 req/sec queue
```
The response body includes `provider: 'pelias' | 'photon' | 'nominatim'`,
`tried: ProviderName[]`, and an optional `notice` (`'fallback_used'` or
`'sensitive_local_unavailable'`) so the caller can render an
"approximate match" hint or explain why a sensitive query returned 0
results.
### Why the public TTL is short (1h)
When photon-self has a transient cross-LAN blip and a request falls
through to public photon, the public answer used to be cached for 7 days
— pinning the cached fallback even after photon-self recovered. With
the 1h TTL the chain returns to photon-self within an hour. The privacy
benefit of long TTLs (fewer outbound queries) is moot now that
photon-self serves the bulk of traffic; only fallback answers go through
public providers.
## Privacy hardening
When a request goes to Pelias, the user's query content + focus point
stay on our infrastructure. When it falls through to Photon or
Nominatim, the query is forwarded to a third party. Three independent
defenses limit what those third parties can learn:
When a request goes to `photon-self`, the user's query content + focus
point stay on our infrastructure. When it falls through to public
Photon or Nominatim, the query is forwarded to a third party. Three
independent defenses limit what those third parties can learn:
### 1. Sensitive-query block (`src/lib/sensitive-query.ts`)
Queries matching the medical / mental-health / crisis-service keyword
list (`Hausarzt`, `Psychiater`, `Klinikum`, `Suchtberatung`, `HIV`,
`Frauenhaus`, …) are **never forwarded to public APIs**, even if Pelias
is unreachable. The chain detects sensitivity at the route layer and
calls `chain.search(req, signal, { localOnly: true })` providers with
`privacy: 'public'` are filtered out *before* the iteration begins, so
there is no race window.
`Frauenhaus`, …) are **never forwarded to public APIs**, even if
photon-self is unreachable. The chain detects sensitivity at the route
layer and calls `chain.search(req, signal, { localOnly: true })`
providers with `privacy: 'public'` are filtered out *before* the
iteration begins, so there is no race window.
When no local provider is available (e.g. Pelias is stopped), a
sensitive query returns `ok: true, results: [], notice:
When no local provider is available (e.g. `PHOTON_SELF_API_URL` is
unset), a sensitive query returns `ok: true, results: [], notice:
'sensitive_local_unavailable'`. The UI should show "Diese Suche bleibt
bewusst lokal — kein Treffer im DACH-Index. Versuche eine allgemeinere
Formulierung." rather than "no results".
@ -267,17 +252,18 @@ Coordinates are rounded before forwarding to public providers:
City-block resolution — sufficient for "what's near me?", avoids
logging exact home/workplace coordinates to a third party.
Pelias always gets full-precision coordinates — quantization only
applies on the way out to public APIs.
`photon-self` always gets full-precision coordinates — quantization
only applies on the way out to public APIs.
### 3. Aggressive caching of public-API answers
### 3. Caching of public-API answers
`config.cache.publicTtlMs` (default 7 days) overrides the default 24h
cache TTL when the response came from a public provider. Same query
from 1000 different users → 1 outbound request to Photon/Nominatim.
This is the strongest privacy lever we have over public providers,
since we can't change their logging behavior — only the rate at which
we feed them queries.
`config.cache.publicTtlMs` (default 1h) overrides the default 24h cache
TTL when the response came from a public provider. Same query from
multiple users within an hour → 1 outbound request to Photon/Nominatim.
The TTL is short by design (see "Why the public TTL is short" above) —
the strong caching lever was an artifact of the era when public Photon
was THE fallback for a stopped Pelias; today it's a last-resort fallback
behind a healthy photon-self.
### What this protects + what it doesn't
@ -286,8 +272,8 @@ we feed them queries.
| Public API sees user's IP | ✓ (wrapper is the proxy, only mac-mini IP goes out) |
| Public API sees user identity / JWT | ✓ (wrapper sends no auth headers) |
| Public API sees query content | partial — sensitive queries blocked entirely, others go through |
| Public API sees user's exact GPS | ✓ (quantized to ~1km / ~110m) |
| Aggregate location-intent profiling | partial — cache reduces volume ~10100× |
| Public API sees user's exact GPS | ✓ (quantized to ~1 km / ~110 m) |
| Aggregate location-intent profiling | partial — cache reduces volume modestly |
| TLS-level traffic analysis (timing) | ✗ (not in scope) |
| Compelled disclosure of public-API logs | ✗ (no legal mitigation) |
@ -295,89 +281,42 @@ Residual risk for non-sensitive queries: "third party learns what
queries our backend made, with timestamps, but not who made them."
Acceptable for restaurant/landmark lookups, blocked for medical lookups.
## Pelias Infrastructure
## photon-self infrastructure
The Pelias stack runs as a separate docker-compose in `pelias/`:
Photon runs on **mana-gpu** (Windows 11 + WSL2 + Docker), as a Java JAR
inside `eclipse-temurin:21-jre` with the unpacked Photon-Europe data
directory (~80 GB) mounted in. Cross-LAN reachable from the Mac mini via
WSL2 mirrored networking on `192.168.178.11:2322`.
- **elasticsearch** — Index storage (Docker volume, ~5GB for DACH after
indexing 13.4M OSM objects — 10M addresses + 3.3M venues)
- **api** — HTTP API (port 4000), patched for category passthrough
- **libpostal** — Address parsing (internal only, not exposed on host port
because 4400 collides with mana-infra-landings on the Mac Mini)
- **Import containers** — Run once for initial data load, then stopped
Operator scripts for the weekly DB refresh live in
`services/mana-geocoding/photon-self/`:
**Production RAM usage** (measured on the Mac Mini after the 2026-04-11 deploy):
| File | Purpose |
|------|---------|
| `photon-update.sh` | Atomic-swap update script — downloads new tarball, unpacks, restarts the container, rolls back on failure. Installed on mana-gpu at `/usr/local/bin/photon-update.sh`. |
| `photon-update.service` | systemd oneshot unit that runs `photon-update.sh`. |
| `photon-update.timer` | systemd timer (Sun 03:30 + 30min jitter, `Persistent=true`). |
| `README.md` | Re-installation steps for DR scenarios + manual test commands. |
| Container | RAM |
|---|---|
| pelias-elasticsearch | ~1.2 GB |
| pelias-libpostal | ~1.9 GB (address parser model) |
| pelias-api | ~100 MB |
| mana-geocoding (wrapper) | ~2060 MB |
Total: **~3.2 GB** — larger than the initial ~1.5 GB estimate because
libpostal loads its full address parser into memory up front.
### Initial import (one-time)
The DACH PBF extract is ~5GB and takes 30-45 minutes to index. See
`pelias/setup.sh` for the full pipeline. Key steps, in order:
1. `docker compose up -d` — bring up ES, api, libpostal
2. `docker exec pelias-elasticsearch elasticsearch-plugin install analysis-icu`
then restart — the official ES image doesn't ship `analysis-icu` which
Pelias' schema mapping requires
3. `docker compose --profile import run --rm schema ./bin/create_index`
4. `docker compose --profile import run --rm openstreetmap ./bin/download`
(downloads `dach-latest.osm.pbf` from Geofabrik, ~5GB)
5. **Rename** `dach-latest.osm.pbf``planet-latest.osm.pbf` inside the
pelias-data volume (Pelias' importer expects that filename). The
`pelias.json` config references it as `planet-latest.osm.pbf` too.
6. `docker compose --profile import run --rm openstreetmap ./bin/start`
(22M objects, ~30 min on an M2 Mac mini)
### pelias.json gotchas
A few non-obvious settings required for a self-hosted DACH deployment:
- **`adminLookup.enabled: false`** — Pelias tries to resolve country/region
hierarchies via "Who's On First" data by default. We don't import WOF,
so this must be disabled or import crashes with `unable to locate sqlite
folder`.
- **`leveldbpath: "/data/leveldb"`** — not `/tmp/leveldb`; the container
user (1001) needs write access and `/tmp` is not mounted.
- **`api.services.libpostal: { url: "..." }`** — must be an object, not a
string. The API's Joi schema rejects the string form.
- **Only declare services you actually run.** We used to list `placeholder`,
`pip`, and `interpolation` in `api.services` but never ran the containers;
Pelias logged `ENOTFOUND` errors on every query. Dropping the unused
entries makes Pelias degrade cleanly to libpostal-only parsing (warns
`service disabled` once at startup, then silent).
- **No `defaultParameters.boundary.country`** — Pelias only accepts a
single country value for `boundary.country`. Since our index only
contains DACH data anyway, we drop the filter entirely.
- **`features: { filename: "planet-latest.osm.pbf" }`** — required because
Geofabrik downloads come named `dach-latest.osm.pbf`, but Pelias'
openstreetmap importer looks for `planet-latest.osm.pbf` by default.
The migration log + 5 WSL2 gotchas are documented in
[`docs/reports/geocoding-self-hosting-2026-04-28.md`](../../docs/reports/geocoding-self-hosting-2026-04-28.md).
### Wrapper gotchas
- **`idleTimeout: 60`** on `Bun.serve` — the default 10 s cuts off cold
queries that hit Elasticsearch and libpostal in sequence. 60 s is
generous for the worst case while still catching actually-stuck
connections.
- **Colima bind-mount cache.** The mac-mini bind-mounts this repo's files
into several monitoring containers. Colima on macOS sometimes serves a
stale view of a bind-mounted file even after the file on disk changes.
After editing `scripts/generate-status-page.sh` (also bind-mounted into
`mana-status-gen`), restart the consuming container so it sees the
fresh content: `docker restart mana-status-gen`.
- **`host.docker.internal` doesn't resolve from blackbox-exporter** on
Colima, so the external monitoring can't probe pelias-api or
elasticsearch directly. Instead, the wrapper exposes `/health/pelias`
which proxies a request to Pelias; Prometheus probes that internal
endpoint inside the docker network. See `prometheus.yml` job
`blackbox-internal`.
cross-LAN queries to photon-self where OpenSearch needs to recover
shards. 60 s is generous for the worst case while still catching
actually-stuck connections.
- **Cross-LAN reach is occasionally flaky.** A photon-self request
sometimes hangs for the full `PROVIDER_TIMEOUT_MS` (8 s default), which
marks the provider unhealthy for 30 s. During that window, requests
fall through to public photon. With `CACHE_PUBLIC_TTL_MS=3600000` (1h),
the cached public answers expire fast enough that the chain returns to
photon-self once it's healthy again.
- **`host.docker.internal` is no longer needed.** The Pelias era used
`extra_hosts: host.docker.internal:host-gateway` to reach Pelias on
the host network. photon-self is reached over LAN by IP, so the
docker-compose entry no longer carries `extra_hosts`.
## Testing
@ -392,28 +331,27 @@ cd services/mana-geocoding
bun test
```
- `src/lib/__tests__/category-map.test.ts` — Pelias→PlaceCategory
priority resolution.
- `src/lib/__tests__/osm-category-map.test.ts` — raw OSM-tag→PlaceCategory
mapping used by Photon + Nominatim (since they emit `class:type` rather
than Pelias's curated taxonomy).
- `src/lib/__tests__/osm-category-map.test.ts` — raw OSM-tag →
PlaceCategory mapping (used by Photon + Nominatim).
- `src/lib/__tests__/cache.test.ts` — LRU eviction order, TTL expiry,
move-to-end on `get`, size tracking.
- `src/lib/__tests__/rate-limiter.test.ts` — single-token rate limiter
(used to enforce Nominatim's 1 req/sec policy). FIFO order, abort
cleanup, busy-flag release on aborted interval-wait.
- `src/providers/__tests__/chain.test.ts` — provider chain failover, health
cache, "stop on empty results" semantics.
- `src/lib/__tests__/privacy.test.ts` — coordinate quantization edge
cases.
- `src/lib/__tests__/sensitive-query.test.ts` — keyword-list coverage.
- `src/providers/__tests__/chain.test.ts` — provider chain failover,
health cache, "stop on empty results" semantics, localOnly mode.
- `src/providers/__tests__/photon-normalizer.test.ts` and
`nominatim-normalizer.test.ts` — locking the wire-format mapping for the
two public fallback providers.
As of the 2026-04-28 privacy-hardening rollout: **141 tests, all green**.
`nominatim-normalizer.test.ts` — wire-format mapping for the two
public providers.
- `src/__tests__/app.test.ts``createChain()` registration tests
(photon-self opt-in via env-var, chain order honored).
### Smoke test (`bun run test:smoke`)
End-to-end curls against a running service. Requires a fully deployed
Pelias stack with the DACH index loaded — run this after a deploy to
End-to-end curls against a running service. Run after a deploy to
confirm the full pipeline is healthy.
```bash
@ -422,9 +360,9 @@ bun run test:smoke # default http://localhost:3
./scripts/smoke-test.sh http://mana-geocoding:3018 # from another container
```
Asserts: wrapper + pelias health, restaurant→food, station→transit,
street+locality fallback returns results, focus biasing works, reverse
geocoding for Konstanz and München, cache hit on repeat. 9 checks.
Asserts: wrapper + photon-self health, restaurant→food category,
station→transit, street/locality fallback, focus biasing, reverse
geocoding for Konstanz and München, cache hit on repeat.
## Code Layout
@ -435,22 +373,22 @@ src/
├── config.ts # Environment config (incl. provider list)
├── routes/
│ ├── geocode.ts # Forward + reverse, delegates to chain
│ └── health.ts # /health, /health/pelias, /health/providers
│ └── health.ts # /health, /health/photon-self, /health/providers
├── providers/
│ ├── types.ts # GeocodingProvider interface, shared shape
│ ├── chain.ts # Failover orchestrator + health cache
│ ├── pelias.ts # Primary: self-hosted DACH Pelias
│ ├── photon.ts # Fallback 1: photon.komoot.io
│ └── nominatim.ts # Fallback 2: nominatim.openstreetmap.org
│ ├── photon.ts # photon-self + public photon (same class, two configs)
│ └── nominatim.ts # Public nominatim.openstreetmap.org
└── lib/
├── cache.ts # LRU cache with TTL + per-entry override
├── category-map.ts # Pelias-taxonomy → PlaceCategory
├── category-map.ts # PlaceCategory type definition
├── osm-category-map.ts # Raw OSM `class:type` → PlaceCategory
├── privacy.ts # Coordinate quantization for public APIs
├── rate-limiter.ts # Single-token limiter (used by Nominatim)
└── sensitive-query.ts # Health/crisis keyword detector
pelias/
├── docker-compose.yml # Pelias stack
├── pelias.json # Pelias config (DACH region)
└── setup.sh # Initial data import script
photon-self/ # Operator scripts for the mana-gpu Photon
├── photon-update.sh # Atomic-swap weekly update (deployed to mana-gpu)
├── photon-update.service # systemd oneshot unit
├── photon-update.timer # systemd weekly timer
└── README.md # Re-install steps for DR
```

View file

@ -1,121 +0,0 @@
# Pelias geocoding stack for mana-geocoding.
#
# Data pipeline: download → prepare → import → serve.
# See pelias/README.md for initial setup instructions.
#
# After import, only `api` and `libpostal` need to stay running.
# The import containers (placeholder, interpolation, pip, elasticsearch)
# run during import and can be stopped afterward if RAM is tight,
# but elasticsearch must stay up for queries.
services:
# --- Always running ---
api:
image: pelias/api:latest
container_name: pelias-api
restart: unless-stopped
ports:
- "4000:4000"
environment:
PORT: 4000
volumes:
- ./pelias.json:/code/pelias.json:ro
# Patch: always return the `category` field in API responses, not only
# when a `categories=...` filter is present. Pelias' default
# `checkCategoryParam` hides category from results unless the caller
# filters by it, but we want the OSM taxonomy (food, retail, transport, …)
# on every venue so our Places UI can auto-map it to a PlaceCategory.
# The patched file is generated from the upstream one with
# `sed "s|condition: checkCategoryParam|condition: () => true|"`
- ./geojsonify_place_details.js:/code/pelias/api/helper/geojsonify_place_details.js:ro
depends_on:
elasticsearch:
condition: service_healthy
networks:
- pelias
libpostal:
image: pelias/libpostal-service
container_name: pelias-libpostal
restart: unless-stopped
# No host port mapping — libpostal is an internal dependency of
# pelias-api, reached over the pelias network at libpostal:4400.
# Port 4400 on the host is used by mana-infra-landings (nginx for
# status.mana.how) on the production mac mini.
expose:
- "4400"
networks:
- pelias
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.1
container_name: pelias-elasticsearch
restart: unless-stopped
ports:
- "9200:9200"
volumes:
- pelias-elasticsearch:/usr/share/elasticsearch/data
environment:
ES_JAVA_OPTS: "-Xms512m -Xmx512m"
discovery.type: single-node
xpack.security.enabled: "false"
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9200/_cluster/health"]
interval: 10s
timeout: 5s
retries: 30
networks:
- pelias
# --- Import pipeline (run once, then stop) ---
schema:
image: pelias/schema:latest
container_name: pelias-schema
volumes:
- ./pelias.json:/code/pelias.json:ro
depends_on:
elasticsearch:
condition: service_healthy
networks:
- pelias
profiles: ["import"]
openstreetmap:
image: pelias/openstreetmap:latest
container_name: pelias-openstreetmap
volumes:
- ./pelias.json:/code/pelias.json:ro
- pelias-data:/data
depends_on:
elasticsearch:
condition: service_healthy
networks:
- pelias
profiles: ["import"]
polylines:
image: pelias/polylines:latest
container_name: pelias-polylines
volumes:
- ./pelias.json:/code/pelias.json:ro
- pelias-data:/data
depends_on:
elasticsearch:
condition: service_healthy
networks:
- pelias
profiles: ["import"]
volumes:
pelias-elasticsearch:
pelias-data:
networks:
pelias:
driver: bridge

View file

@ -1,123 +0,0 @@
const _ = require('lodash');
const field = require('./fieldValue');
// Properties to be copied
// If a property is identified as a single string, assume it should be presented as a string in response
// If something other than string is desired, use the following structure: { name: 'category', type: 'array' }
const DETAILS_PROPS = [
{ name: 'unit', type: 'string' },
{ name: 'housenumber', type: 'string' },
{ name: 'street', type: 'string' },
{ name: 'postalcode', type: 'string' },
{ name: 'postalcode_gid', type: 'string' },
{ name: 'confidence', type: 'default' },
{ name: 'match_type', type: 'string' },
{ name: 'distance', type: 'default' },
{ name: 'accuracy', type: 'string' },
{ name: 'country', type: 'string' },
{ name: 'country_gid', type: 'string' },
{ name: 'country_a', type: 'string' },
{ name: 'dependency', type: 'string' },
{ name: 'dependency_gid', type: 'string' },
{ name: 'dependency_a', type: 'string' },
{ name: 'macroregion', type: 'string' },
{ name: 'macroregion_gid', type: 'string' },
{ name: 'macroregion_a', type: 'string' },
{ name: 'region', type: 'string' },
{ name: 'region_gid', type: 'string' },
{ name: 'region_a', type: 'string' },
{ name: 'macrocounty', type: 'string' },
{ name: 'macrocounty_gid', type: 'string' },
{ name: 'macrocounty_a', type: 'string' },
{ name: 'county', type: 'string' },
{ name: 'county_gid', type: 'string' },
{ name: 'county_a', type: 'string' },
{ name: 'localadmin', type: 'string' },
{ name: 'localadmin_gid', type: 'string' },
{ name: 'localadmin_a', type: 'string' },
{ name: 'locality', type: 'string' },
{ name: 'locality_gid', type: 'string' },
{ name: 'locality_a', type: 'string' },
{ name: 'borough', type: 'string' },
{ name: 'borough_gid', type: 'string' },
{ name: 'borough_a', type: 'string' },
{ name: 'neighbourhood', type: 'string' },
{ name: 'neighbourhood_gid', type: 'string' },
{ name: 'continent', type: 'string' },
{ name: 'continent_gid', type: 'string' },
{ name: 'continent_a', type: 'string' },
{ name: 'empire', type: 'string', condition: _.negate(hasCountry) },
{ name: 'empire_gid', type: 'string', condition: _.negate(hasCountry) },
{ name: 'empire_a', type: 'string', condition: _.negate(hasCountry) },
{ name: 'ocean', type: 'string' },
{ name: 'ocean_gid', type: 'string' },
{ name: 'ocean_a', type: 'string' },
{ name: 'marinearea', type: 'string' },
{ name: 'marinearea_gid', type: 'string' },
{ name: 'marinearea_a', type: 'string' },
{ name: 'bounding_box', type: 'default' },
{ name: 'label', type: 'string' },
{ name: 'category', type: 'array', condition: () => true },
];
const EXTENDED_PROPS = DETAILS_PROPS.concat([
{ name: 'population', type: 'default' },
{ name: 'popularity', type: 'default' },
]);
// returns true IFF source a country_gid property
function hasCountry(params, source) {
return source.hasOwnProperty('country_gid');
}
function checkCategoryParam(params) {
return _.isObject(params) && params.hasOwnProperty('categories');
}
/**
* Collect the specified properties from source into an object and return it
* Ignore missing properties.
*
* @param {object} params clean query params
* @param {object} source
* @param {object} dst
*/
function collectProperties(params, source) {
let props = DETAILS_PROPS;
// extended properties when debugging mode is enabled
if (params.enableDebug === true) {
props = EXTENDED_PROPS;
}
return props.reduce((result, prop) => {
// if condition isn't met, don't set the property
if (_.isFunction(prop.condition) && !prop.condition(params, source)) {
return result;
}
if (source.hasOwnProperty(prop.name)) {
let value = null;
switch (prop.type) {
case 'string':
value = field.getStringValue(source[prop.name]);
break;
case 'array':
value = field.getArrayValue(source[prop.name]);
break;
// default behavior is to copy property exactly as is
default:
value = source[prop.name];
}
if (_.isNumber(value) || (value && !_.isEmpty(value))) {
result[prop.name] = value;
}
}
return result;
}, {});
}
module.exports = collectProperties;

View file

@ -1,52 +0,0 @@
{
"esclient": {
"apiVersion": "7.x",
"hosts": [
{
"host": "elasticsearch",
"port": 9200
}
]
},
"api": {
"services": {
"libpostal": {
"url": "http://libpostal:4400"
}
},
"defaultParameters": {}
},
"imports": {
"adminLookup": {
"enabled": false
},
"openstreetmap": {
"download": [
{
"sourceURL": "https://download.geofabrik.de/europe/dach-latest.osm.pbf"
}
],
"datapath": "/data/openstreetmap",
"leveldbpath": "/data/leveldb",
"importVenues": true,
"importAddresses": true,
"adminLookup": false,
"useAdminHierarchyLabels": false,
"leveldb": {
"cacheSize": 256
},
"import": [
{
"filename": "planet-latest.osm.pbf"
}
]
},
"polylines": {
"datapath": "/data/polylines",
"files": ["extract.0sv"]
}
},
"logger": {
"level": "info"
}
}

View file

@ -1,35 +0,0 @@
#!/bin/bash
# Initial Pelias data import for DACH region.
#
# Run this ONCE after first docker compose up.
# Takes 30-60 minutes depending on hardware.
#
# After import, the "import" profile containers can be stopped.
set -euo pipefail
cd "$(dirname "$0")"
echo "=== Step 1: Create Elasticsearch schema ==="
docker compose --profile import run --rm schema ./bin/create_index
echo "=== Step 2: Download DACH OSM data ==="
mkdir -p data/openstreetmap
docker compose --profile import run --rm openstreetmap ./bin/download
echo "=== Step 3: Import OpenStreetMap data ==="
docker compose --profile import run --rm openstreetmap ./bin/start
echo "=== Step 4: Import polylines (street data) ==="
docker compose --profile import run --rm polylines ./bin/download
docker compose --profile import run --rm polylines ./bin/start
echo ""
echo "=== Import complete! ==="
echo "Pelias API is available at http://localhost:4000/v1"
echo ""
echo "Test it:"
echo " curl 'http://localhost:4000/v1/search?text=Münsterplatz+Konstanz'"
echo " curl 'http://localhost:4000/v1/reverse?point.lat=47.663&point.lon=9.175'"
echo ""
echo "You can now stop the import containers:"
echo " docker compose --profile import stop"

View file

@ -61,7 +61,7 @@ urlenc() {
echo "--- Health ---"
check "wrapper health" "$BASE/health" '.status' 'ok'
check "pelias health proxy" "$BASE/health/pelias" '.status' 'ok'
check "photon-self health proxy" "$BASE/health/photon-self" '.status' 'ok'
echo
# --- 2. Forward geocoding ---

View file

@ -14,7 +14,6 @@ import type { Config } from '../config';
function baseConfig(overrides: Partial<Config> = {}): Config {
return {
port: 3018,
pelias: { apiUrl: 'http://127.0.0.1:1' },
photon: { apiUrl: 'https://photon.komoot.io' },
photonSelf: { apiUrl: undefined },
nominatim: {
@ -25,7 +24,7 @@ function baseConfig(overrides: Partial<Config> = {}): Config {
cors: { origins: [] },
cache: { maxEntries: 100, ttlMs: 1000, publicTtlMs: 7000 },
providers: {
enabled: ['photon-self', 'pelias', 'photon', 'nominatim'],
enabled: ['photon-self', 'photon', 'nominatim'],
healthCacheMs: 30_000,
timeoutMs: 8000,
},

View file

@ -9,7 +9,6 @@ import type { Config } from './config';
import { RateLimiter } from './lib/rate-limiter';
import { ProviderChain } from './providers/chain';
import { NominatimProvider } from './providers/nominatim';
import { PeliasProvider } from './providers/pelias';
import { PhotonProvider } from './providers/photon';
import type { GeocodingProvider, ProviderName } from './providers/types';
import { createGeocodeRoutes } from './routes/geocode';
@ -47,18 +46,10 @@ export function createApp(config: Config): Hono {
export function createChain(config: Config): ProviderChain {
const built = new Map<ProviderName, GeocodingProvider>();
built.set(
'pelias',
new PeliasProvider({
apiUrl: config.pelias.apiUrl,
timeoutMs: config.providers.timeoutMs,
})
);
// Self-hosted Photon (mana-gpu). Only registered when the env-var is set
// — pre-migration this stays absent and the chain falls through to
// public providers as before. Once the GPU server is running Photon,
// flip PHOTON_SELF_API_URL on and this becomes the primary provider.
// — without it the chain runs on public providers only. Once the GPU
// server is running Photon, flip PHOTON_SELF_API_URL on and this
// becomes the primary provider.
if (config.photonSelf.apiUrl) {
built.set(
'photon-self',

View file

@ -6,10 +6,6 @@ import type { ProviderName } from './providers/types';
export interface Config {
port: number;
pelias: {
/** Pelias API base URL (the API container, not the placeholder service) */
apiUrl: string;
};
photon: {
/** Photon base URL public komoot endpoint by default. Used by
* the `'photon'` provider slot which always has `privacy: 'public'`. */
@ -20,7 +16,7 @@ export interface Config {
* GPU server). When set, the wrapper registers a separate
* `'photon-self'` provider with `privacy: 'local'` eligible for
* sensitive queries. When undefined, the slot is disabled and the
* chain only has the public providers (current pre-migration state). */
* chain runs on public providers only. */
apiUrl: string | undefined;
};
nominatim: {
@ -37,12 +33,13 @@ export interface Config {
/** Max entries in the in-memory LRU cache */
maxEntries: number;
/** Default TTL in milliseconds (24h used for results from local
* providers like Pelias, where the index can be re-imported) */
* providers like photon-self) */
ttlMs: number;
/** Extended TTL for results that came from public APIs (Photon,
* Nominatim). 7 days by default caching aggressively reduces
* the number of times we forward query content to a third party,
* which is the main privacy lever we have over public providers. */
/** TTL for results that came from public APIs (Photon, Nominatim).
* Capped at 1h so a brief blip in photon-self can't pin stale
* public-fallback answers in the cache for days. The privacy
* benefit of long TTLs (fewer outbound queries) is moot now that
* photon-self serves the bulk of traffic. */
publicTtlMs: number;
};
providers: {
@ -60,9 +57,6 @@ export interface Config {
export function loadConfig(): Config {
return {
port: parseInt(process.env.PORT || '3018', 10),
pelias: {
apiUrl: process.env.PELIAS_API_URL || 'http://localhost:4000/v1',
},
photon: {
apiUrl: process.env.PHOTON_API_URL || 'https://photon.komoot.io',
},
@ -86,17 +80,15 @@ export function loadConfig(): Config {
cache: {
maxEntries: parseInt(process.env.CACHE_MAX_ENTRIES || '5000', 10),
ttlMs: parseInt(process.env.CACHE_TTL_MS || String(24 * 60 * 60 * 1000), 10),
publicTtlMs: parseInt(process.env.CACHE_PUBLIC_TTL_MS || String(7 * 24 * 60 * 60 * 1000), 10),
publicTtlMs: parseInt(process.env.CACHE_PUBLIC_TTL_MS || String(60 * 60 * 1000), 10),
},
providers: {
// Default order (when GEOCODING_PROVIDERS is unset): try the
// self-hosted Photon first if it's been configured, then public
// providers as fallback. `photon-self` is silently dropped at
// chain-build time if `photonSelf.apiUrl` is undefined, so the
// list is the same shape regardless of migration status.
// chain-build time if `photonSelf.apiUrl` is undefined.
enabled: parseProviderList(process.env.GEOCODING_PROVIDERS, [
'photon-self',
'pelias',
'photon',
'nominatim',
]),
@ -112,7 +104,7 @@ export function loadConfig(): Config {
function parseProviderList(raw: string | undefined, fallback: ProviderName[]): ProviderName[] {
if (!raw) return fallback;
const valid: ProviderName[] = ['pelias', 'photon-self', 'photon', 'nominatim'];
const valid: ProviderName[] = ['photon-self', 'photon', 'nominatim'];
const parsed = raw
.split(',')
.map((s) => s.trim().toLowerCase())

View file

@ -1,9 +1,8 @@
/**
* mana-geocoding Self-hosted geocoding proxy.
*
* Wraps a local Pelias instance with caching and OSM PlaceCategory
* mapping. All geocoding queries stay within our infrastructure
* no user location data leaves the network.
* mana-geocoding geocoding proxy with provider chain (photon-self
* public photon public nominatim) and aggressive caching. Sensitive
* queries are blocked from public providers; all forwarded queries are
* coordinate-quantized.
*/
import { createApp } from './app';
@ -12,13 +11,17 @@ import { loadConfig } from './config';
const config = loadConfig();
console.log(`mana-geocoding starting on port ${config.port}...`);
console.log(`Pelias API: ${config.pelias.apiUrl}`);
console.log(`Providers: ${config.providers.enabled.join(', ')}`);
if (config.photonSelf.apiUrl) {
console.log(`photon-self: ${config.photonSelf.apiUrl}`);
}
export default {
port: config.port,
// Bun's default idleTimeout is 10s — too tight for Pelias cold queries
// that need to hit Elasticsearch and libpostal. 60s is generous enough
// for the worst-case while still cutting off stuck connections.
// Bun's default idleTimeout is 10s — too tight for cold cross-LAN
// queries to photon-self that hit OpenSearch on a fresh shard. 60s is
// generous enough for the worst case while still cutting off stuck
// connections.
idleTimeout: 60,
fetch: createApp(config).fetch,
};

View file

@ -1,184 +0,0 @@
/**
* Unit tests for the PeliasPlaceCategory mapping.
*
* This is the subtle part of the service: a Pelias venue often has
* multiple categories (e.g. a restaurant is `['food','retail','nightlife']`)
* and we need to pick the most specific one. The priority list in
* category-map.ts encodes that choice, and these tests lock it in.
*/
import { describe, it, expect } from 'bun:test';
import { mapPeliasToPlaceCategory } from '../category-map';
describe('mapPeliasToPlaceCategory', () => {
describe('priority-ordered multi-category resolution', () => {
it('picks food over retail for a restaurant', () => {
expect(mapPeliasToPlaceCategory(['food', 'retail', 'nightlife'])).toBe('food');
});
it('picks food over retail for a bakery', () => {
// Bakery is tagged food+retail in the Pelias OSM taxonomy
expect(mapPeliasToPlaceCategory(['food', 'retail'])).toBe('food');
});
it('picks food over nightlife for a cafe', () => {
expect(mapPeliasToPlaceCategory(['food', 'nightlife'])).toBe('food');
});
it('picks transit over professional for a car_rental', () => {
// car_rental is tagged transport+professional in Pelias
expect(mapPeliasToPlaceCategory(['transport', 'professional'])).toBe('transit');
});
it('picks transit for a bus_station (multiple transport subcategories)', () => {
expect(mapPeliasToPlaceCategory(['transport', 'transport:public', 'transport:bus'])).toBe(
'transit'
);
});
it('picks transit for a station (transport:rail)', () => {
expect(
mapPeliasToPlaceCategory([
'transport',
'transport:public',
'transport:station',
'transport:rail',
])
).toBe('transit');
});
});
describe('single-category resolution', () => {
it('maps food to food', () => {
expect(mapPeliasToPlaceCategory(['food'])).toBe('food');
});
it('maps retail to shopping', () => {
expect(mapPeliasToPlaceCategory(['retail'])).toBe('shopping');
});
it('maps transport to transit', () => {
expect(mapPeliasToPlaceCategory(['transport'])).toBe('transit');
});
it('maps education to work', () => {
expect(mapPeliasToPlaceCategory(['education'])).toBe('work');
});
it('maps professional to work', () => {
expect(mapPeliasToPlaceCategory(['professional'])).toBe('work');
});
it('maps government to work', () => {
expect(mapPeliasToPlaceCategory(['government'])).toBe('work');
});
it('maps finance to work', () => {
expect(mapPeliasToPlaceCategory(['finance'])).toBe('work');
});
it('maps entertainment to leisure', () => {
expect(mapPeliasToPlaceCategory(['entertainment'])).toBe('leisure');
});
it('maps nightlife to leisure', () => {
expect(mapPeliasToPlaceCategory(['nightlife'])).toBe('leisure');
});
it('maps recreation to leisure', () => {
expect(mapPeliasToPlaceCategory(['recreation'])).toBe('leisure');
});
it('maps health to other', () => {
expect(mapPeliasToPlaceCategory(['health'])).toBe('other');
});
it('maps religion to other', () => {
expect(mapPeliasToPlaceCategory(['religion'])).toBe('other');
});
});
describe('real-world Pelias venue categories', () => {
// These are literal category arrays observed from the Konstanz DACH
// index during the 2026-04-11 deploy verification. Locking them in
// as regression tests so future priority changes can't silently
// break address search in production.
it('Konzil Restaurant Konstanz → food', () => {
expect(mapPeliasToPlaceCategory(['food', 'retail', 'nightlife'])).toBe('food');
});
it('Stuttgart Hauptbahnhof → transit', () => {
expect(
mapPeliasToPlaceCategory([
'transport',
'transport:public',
'transport:station',
'transport:rail',
])
).toBe('transit');
});
it('Physiotherapie-Schule → work', () => {
expect(mapPeliasToPlaceCategory(['education'])).toBe('work');
});
it('MX-Park (Rennstrecke) → leisure', () => {
expect(mapPeliasToPlaceCategory(['recreation'])).toBe('leisure');
});
it('KulturKiosk → work', () => {
// KulturKiosk is tagged professional in Pelias
expect(mapPeliasToPlaceCategory(['professional'])).toBe('work');
});
it('Kölner Domshop → shopping', () => {
expect(mapPeliasToPlaceCategory(['retail'])).toBe('shopping');
});
});
describe('empty / null / unknown categories', () => {
it('returns other for empty array', () => {
expect(mapPeliasToPlaceCategory([])).toBe('other');
});
it('returns other for undefined', () => {
expect(mapPeliasToPlaceCategory(undefined)).toBe('other');
});
it('returns other for null', () => {
expect(mapPeliasToPlaceCategory(null)).toBe('other');
});
it('returns other for unknown category strings', () => {
expect(mapPeliasToPlaceCategory(['random', 'unknown'])).toBe('other');
});
it('picks known category even if unknown ones come first', () => {
expect(mapPeliasToPlaceCategory(['unknown', 'food'])).toBe('food');
});
});
describe('Pelias layer fallback', () => {
it('uses layer hint for venue with no categories', () => {
expect(mapPeliasToPlaceCategory(undefined, 'venue')).toBe('other');
});
it('uses layer hint for address', () => {
expect(mapPeliasToPlaceCategory(undefined, 'address')).toBe('other');
});
it('uses layer hint for street', () => {
expect(mapPeliasToPlaceCategory(undefined, 'street')).toBe('other');
});
it('uses layer hint for locality', () => {
expect(mapPeliasToPlaceCategory(undefined, 'locality')).toBe('other');
});
it('prefers categories over layer hint', () => {
// A venue with food category should be food, not other
expect(mapPeliasToPlaceCategory(['food'], 'venue')).toBe('food');
});
});
});

View file

@ -2,8 +2,6 @@
* Unit tests for the raw-OSM-tag PlaceCategory mapper.
*
* Covers the cases Photon and Nominatim emit for typical DACH queries.
* The Pelias mapper has its own tests in category-map.test.ts; this file
* tests *only* the raw-OSM-tag path used by the public-API fallbacks.
*/
import { describe, expect, it } from 'bun:test';
@ -54,7 +52,7 @@ describe('mapOsmTagToPlaceCategory', () => {
expect(mapOsmTagToPlaceCategory('aeroway', 'aerodrome')).toBe('transit');
});
it('amenity:car_rental → transit', () => {
// Matches Pelias mapper's "car_rental → transit" decision
// car_rental → transit (transport-flavored)
expect(mapOsmTagToPlaceCategory('amenity', 'car_rental')).toBe('transit');
});
});
@ -116,7 +114,7 @@ describe('mapOsmTagToPlaceCategory', () => {
describe('other (health/religion/unknown)', () => {
it('amenity:hospital → other', () => {
// Health goes to other (matches Pelias mapper)
// Health goes to other
expect(mapOsmTagToPlaceCategory('amenity', 'hospital')).toBe('other');
});
it('amenity:pharmacy → other', () => {

View file

@ -1,7 +1,7 @@
/**
* Simple in-memory LRU cache with TTL for geocoding results.
* Geocoding results rarely change, so we cache aggressively to
* reduce load on the Pelias instance.
* Geocoding results rarely change, so we cache to reduce load on
* upstream providers.
*/
interface CacheEntry<T> {
@ -37,11 +37,10 @@ export class LRUCache<T> {
/**
* Insert or update a cache entry.
*
* @param ttlOverrideMs Optional per-entry TTL. Useful when results
* from public-API providers should live longer than results from
* the (frequently-changing) local Pelias index e.g. 7 days for
* Photon/Nominatim answers, 24 hours for Pelias answers. When
* omitted, the constructor's default TTL applies.
* @param ttlOverrideMs Optional per-entry TTL. The route layer uses
* this so public-fallback answers expire faster than local-provider
* answers see `ttlFor()` in routes/geocode.ts. When omitted, the
* constructor's default TTL applies.
*/
set(key: string, value: T, ttlOverrideMs?: number): void {
// Delete first so re-insert goes to end

View file

@ -1,89 +1,10 @@
/**
* Maps Pelias categories (OSM taxonomy) to our 7 Places categories.
*
* Pelias' openstreetmap importer tags venues with categories from its
* built-in taxonomy (food, retail, transport, health, education, ).
* We collapse those into the simpler Places enum:
* The 7 Places categories used across the geocoding wrapper and clients.
*
* home · work · food · shopping · transit · leisure · other
*
* A venue can have multiple Pelias categories (e.g. a restaurant is
* tagged `['food', 'retail', 'nightlife']`). We pick the most specific
* one in priority order rather than the first a restaurant should be
* "food" even though "retail" also matches.
* Provider-specific mappers (see `osm-category-map.ts` for Photon /
* Nominatim) collapse the upstream taxonomy into this shape. `home` is
* never auto-detected it's set manually by the user.
*/
export type PlaceCategory = 'home' | 'work' | 'food' | 'shopping' | 'transit' | 'leisure' | 'other';
/**
* Priority-ordered: first matching category wins. Earlier entries are
* more specific, so "food" beats "retail", "transport" beats "professional".
*/
const PELIAS_PRIORITY: Array<[string, PlaceCategory]> = [
// Food is strongest signal — a restaurant is food, not retail
['food', 'food'],
// Transit/transport
['transport:public', 'transit'],
['transport:air', 'transit'],
['transport:sea', 'transit'],
['transport:bus', 'transit'],
['transport:taxi', 'transit'],
['transport', 'transit'],
// Shopping — explicit retail markers
['retail', 'shopping'],
// Leisure / entertainment / recreation
['entertainment', 'leisure'],
['nightlife', 'leisure'],
['recreation', 'leisure'],
// Work-ish
['education', 'work'],
['professional', 'work'],
['government', 'work'],
['finance', 'work'],
// Health/religion fall through to other
['health', 'other'],
['religion', 'other'],
];
/**
* Derive a PlaceCategory from a Pelias feature's category array.
*
* @param categories The `category` array from a Pelias feature's properties
* @param peliasLayer The Pelias layer (venue, address, street, ) used as fallback hint
*/
export function mapPeliasToPlaceCategory(
categories?: string[] | null,
peliasLayer?: string
): PlaceCategory {
if (Array.isArray(categories) && categories.length > 0) {
// Walk our priority list and pick the first match
for (const [peliasCat, placeCat] of PELIAS_PRIORITY) {
if (categories.includes(peliasCat)) return placeCat;
}
}
// Fallback: use Pelias layer as a hint. Addresses/streets/regions
// all land in "other" since they aren't really "places" in the
// categorical sense.
if (peliasLayer) {
switch (peliasLayer) {
case 'venue':
return 'other';
case 'address':
case 'street':
return 'other';
case 'neighbourhood':
case 'locality':
case 'region':
case 'country':
return 'other';
}
}
return 'other';
}

View file

@ -2,15 +2,9 @@
* Maps raw OSM `class:type` tags (Photon's `osm_key:osm_value`,
* Nominatim's `class:type`) to our 7 PlaceCategories.
*
* Pelias has a curated multi-category taxonomy (`food`, `retail`,
* `transport`, ) that we map via `category-map.ts`. Photon and Nominatim
* return raw OSM tags instead `amenity:restaurant`, `shop:supermarket`,
* `public_transport:station`, etc. so they need a different lookup.
*
* The list below is intentionally narrow: it only covers tags we actually
* see in real Photon/Nominatim responses for DACH queries. Anything else
* falls through to `other`, which matches the Pelias mapper's behavior for
* unknown categories.
* falls through to `other`.
*
* If a query returns a tag we don't handle, that's the signal to add it
* here not to try to enumerate all 1000+ OSM types.
@ -25,8 +19,8 @@ interface Tag {
/**
* Priority-ordered: first match wins. More-specific entries (with a
* `value`) come before generic key-only entries. Matches Pelias's
* "food beats retail" priority intent.
* `value`) come before generic key-only entries. Same "food beats retail"
* priority intent as the upstream taxonomies.
*/
const OSM_RULES: Array<{ match: Tag; category: PlaceCategory }> = [
// ── Food (highest priority — restaurants are food, even when also
@ -82,7 +76,7 @@ const OSM_RULES: Array<{ match: Tag; category: PlaceCategory }> = [
{ match: { key: 'amenity', value: 'embassy' }, category: 'work' },
{ match: { key: 'office' }, category: 'work' },
// ── Health / religion → other (matches Pelias mapper) ───────────
// ── Health / religion → other ───────────────────────────────────
{ match: { key: 'amenity', value: 'hospital' }, category: 'other' },
{ match: { key: 'amenity', value: 'clinic' }, category: 'other' },
{ match: { key: 'amenity', value: 'doctors' }, category: 'other' },

View file

@ -14,7 +14,7 @@
* not telling Photon "user is at THIS HOUSE". Reverse geocoding
* against the city block instead of the building is acceptable.
*
* Pelias and other LAN-local providers always get the original
* Photon-self and other LAN-local providers always get the original
* full-precision coordinates quantization only applies on the way
* out to the public internet.
*/

View file

@ -12,7 +12,7 @@
*
* Trade-offs:
* - False positives are OK (a user searching for "Praxis Müller" who
* wanted the dance studio gets 0 results when Pelias is down not
* wanted the dance studio gets 0 results when photon-self is down not
* ideal but better than a privacy leak)
* - False negatives are NOT OK (we'd rather over-block than under-block)
* - The list is intentionally narrow: only words with clear medical or

View file

@ -68,7 +68,7 @@ const SEARCH: SearchRequest = { q: 'test', limit: 5, lang: 'de' };
describe('ProviderChain — happy path', () => {
it('returns the first provider that succeeds', async () => {
const a = new FakeProvider('pelias');
const a = new FakeProvider('photon-self');
const b = new FakeProvider('photon');
const chain = new ProviderChain({
providers: [a, b],
@ -76,29 +76,29 @@ describe('ProviderChain — happy path', () => {
});
const res = await chain.search(SEARCH);
expect(res.ok).toBe(true);
expect(res.provider).toBe('pelias');
expect(res.tried).toEqual(['pelias']);
expect(res.provider).toBe('photon-self');
expect(res.tried).toEqual(['photon-self']);
expect(a.calls.search).toBe(1);
expect(b.calls.search).toBe(0);
});
it('honors the providers array order', async () => {
const photon = new FakeProvider('photon');
const pelias = new FakeProvider('pelias');
const local = new FakeProvider('photon-self');
// photon first this time
const chain = new ProviderChain({
providers: [photon, pelias],
providers: [photon, local],
healthCacheMs: 60_000,
});
const res = await chain.search(SEARCH);
expect(res.provider).toBe('photon');
expect(pelias.calls.search).toBe(0);
expect(local.calls.search).toBe(0);
});
});
describe('ProviderChain — failover', () => {
it('falls through on unreachable, returns next provider', async () => {
const a = new FakeProvider('pelias', {
const a = new FakeProvider('photon-self', {
search: async () => ({ ok: false, kind: 'unreachable', status: 503 }),
});
const b = new FakeProvider('photon');
@ -106,7 +106,7 @@ describe('ProviderChain — failover', () => {
const res = await chain.search(SEARCH);
expect(res.ok).toBe(true);
expect(res.provider).toBe('photon');
expect(res.tried).toEqual(['pelias', 'photon']);
expect(res.tried).toEqual(['photon-self', 'photon']);
});
it('falls through on rate_limited', async () => {
@ -121,20 +121,20 @@ describe('ProviderChain — failover', () => {
it('STOPS on empty results — does not consume fallback budget', async () => {
// A clean empty answer is definitive: don't burn through public APIs.
const a = new FakeProvider('pelias', {
const a = new FakeProvider('photon-self', {
search: async () => ({ ok: true, results: [] }),
});
const b = new FakeProvider('photon');
const chain = new ProviderChain({ providers: [a, b], healthCacheMs: 60_000 });
const res = await chain.search(SEARCH);
expect(res.ok).toBe(true);
expect(res.provider).toBe('pelias');
expect(res.provider).toBe('photon-self');
expect(res.results).toEqual([]);
expect(b.calls.search).toBe(0);
});
it('returns ok:false when all providers fail', async () => {
const a = new FakeProvider('pelias', {
const a = new FakeProvider('photon-self', {
search: async () => ({ ok: false, kind: 'unreachable' }),
});
const b = new FakeProvider('photon', {
@ -144,23 +144,23 @@ describe('ProviderChain — failover', () => {
const res = await chain.search(SEARCH);
expect(res.ok).toBe(false);
expect(res.results).toEqual([]);
expect(res.tried).toEqual(['pelias', 'photon']);
expect(res.tried).toEqual(['photon-self', 'photon']);
});
});
describe('ProviderChain — health cache', () => {
it('skips a provider whose health probe returned false', async () => {
const dead = new FakeProvider('pelias', { health: async () => false });
const dead = new FakeProvider('photon-self', { health: async () => false });
const alive = new FakeProvider('photon');
const chain = new ProviderChain({ providers: [dead, alive], healthCacheMs: 60_000 });
const res = await chain.search(SEARCH);
expect(res.tried).toEqual(['photon']); // pelias was skipped, not tried
expect(res.tried).toEqual(['photon']); // local was skipped, not tried
expect(dead.calls.search).toBe(0);
expect(dead.calls.health).toBe(1);
});
it('caches health for healthCacheMs — only one probe per window', async () => {
const a = new FakeProvider('pelias');
const a = new FakeProvider('photon-self');
const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 });
await chain.search(SEARCH);
await chain.search(SEARCH);
@ -171,18 +171,19 @@ describe('ProviderChain — health cache', () => {
it('marks provider unhealthy when search fails, skipping it next time', async () => {
let failNext = true;
const flaky = new FakeProvider('pelias', {
search: async () => (failNext ? { ok: false, kind: 'unreachable' } : okResults('pelias')),
const flaky = new FakeProvider('photon-self', {
search: async () =>
failNext ? { ok: false, kind: 'unreachable' } : okResults('photon-self'),
});
const alive = new FakeProvider('photon');
const chain = new ProviderChain({ providers: [flaky, alive], healthCacheMs: 60_000 });
// First call: pelias fails → cached unhealthy → photon serves
// First call: local fails → cached unhealthy → photon serves
const r1 = await chain.search(SEARCH);
expect(r1.provider).toBe('photon');
expect(r1.tried).toEqual(['pelias', 'photon']);
expect(r1.tried).toEqual(['photon-self', 'photon']);
// Second call: pelias is in unhealthy cache, not tried at all
// Second call: local is in unhealthy cache, not tried at all
failNext = false; // would now succeed but never gets called
const r2 = await chain.search(SEARCH);
expect(r2.provider).toBe('photon');
@ -191,7 +192,7 @@ describe('ProviderChain — health cache', () => {
});
it('refreshes health after cache expires', async () => {
const dead = new FakeProvider('pelias', { health: async () => false });
const dead = new FakeProvider('photon-self', { health: async () => false });
const alive = new FakeProvider('photon');
// 1ms cache for fast test
const chain = new ProviderChain({ providers: [dead, alive], healthCacheMs: 1 });
@ -203,7 +204,7 @@ describe('ProviderChain — health cache', () => {
});
it('clearHealthCache forces re-probe', async () => {
const a = new FakeProvider('pelias');
const a = new FakeProvider('photon-self');
const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 });
await chain.search(SEARCH);
expect(a.calls.health).toBe(1);
@ -215,19 +216,19 @@ describe('ProviderChain — health cache', () => {
describe('ProviderChain — getHealthSnapshot', () => {
it('reports per-provider health + age', async () => {
const ok = new FakeProvider('pelias');
const ok = new FakeProvider('photon-self');
const dead = new FakeProvider('photon', { health: async () => false });
const chain = new ProviderChain({ providers: [ok, dead], healthCacheMs: 60_000 });
await chain.search(SEARCH);
const snap = chain.getHealthSnapshot();
expect(snap).toHaveLength(2);
expect(snap[0]).toMatchObject({ name: 'pelias', healthy: true });
expect(snap[0]).toMatchObject({ name: 'photon-self', healthy: true });
expect(snap[1]).toMatchObject({ name: 'photon', healthy: false });
expect(snap[0].ageMs).toBeLessThan(1000);
});
it('reports Infinity age for never-probed providers', async () => {
const a = new FakeProvider('pelias');
const a = new FakeProvider('photon-self');
const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 });
const snap = chain.getHealthSnapshot();
expect(snap[0].ageMs).toBe(Infinity);
@ -237,7 +238,7 @@ describe('ProviderChain — getHealthSnapshot', () => {
describe('ProviderChain — reverse', () => {
it('uses the same provider order for reverse', async () => {
const a = new FakeProvider('pelias', {
const a = new FakeProvider('photon-self', {
reverse: async () => ({ ok: false, kind: 'unreachable' }),
});
const b = new FakeProvider('photon', { privacy: 'public' });
@ -251,26 +252,26 @@ describe('ProviderChain — reverse', () => {
describe('ProviderChain — privacy / localOnly mode', () => {
it('skips public providers when localOnly is true', async () => {
const localPelias = new FakeProvider('pelias', { privacy: 'local' });
const localProvider = new FakeProvider('photon-self', { privacy: 'local' });
const publicPhoton = new FakeProvider('photon', { privacy: 'public' });
const publicNominatim = new FakeProvider('nominatim', { privacy: 'public' });
const chain = new ProviderChain({
providers: [localPelias, publicPhoton, publicNominatim],
providers: [localProvider, publicPhoton, publicNominatim],
healthCacheMs: 60_000,
});
const res = await chain.search(SEARCH, undefined, { localOnly: true });
expect(res.ok).toBe(true);
expect(res.provider).toBe('pelias');
expect(localPelias.calls.search).toBe(1);
expect(res.provider).toBe('photon-self');
expect(localProvider.calls.search).toBe(1);
// Public providers must not even have their search() called
expect(publicPhoton.calls.search).toBe(0);
expect(publicNominatim.calls.search).toBe(0);
});
it('falls back to the second LOCAL provider when the first local fails', async () => {
const local1 = new FakeProvider('pelias', {
const local1 = new FakeProvider('photon-self', {
privacy: 'local',
search: async () => ({ ok: false, kind: 'unreachable' }),
});
@ -313,7 +314,7 @@ describe('ProviderChain — privacy / localOnly mode', () => {
});
it('returns notice: fallback_used when a public provider serves a non-sensitive query', async () => {
const localDown = new FakeProvider('pelias', {
const localDown = new FakeProvider('photon-self', {
privacy: 'local',
health: async () => false,
});
@ -329,10 +330,10 @@ describe('ProviderChain — privacy / localOnly mode', () => {
});
it('NO notice when the local provider serves a non-sensitive query', async () => {
const localUp = new FakeProvider('pelias', { privacy: 'local' });
const localUp = new FakeProvider('photon-self', { privacy: 'local' });
const chain = new ProviderChain({ providers: [localUp], healthCacheMs: 60_000 });
const res = await chain.search(SEARCH);
expect(res.provider).toBe('pelias');
expect(res.provider).toBe('photon-self');
expect(res.notice).toBeUndefined();
});
});

View file

@ -1,7 +1,7 @@
/**
* Tests for normalizing Nominatim's flat-JSON shape into our GeocodingResult.
*
* Nominatim differs from Photon/Pelias in three subtle ways we lock in:
* Nominatim differs from Photon in three subtle ways we lock in:
* 1. Lat/lon are STRINGS, not numbers the normalizer must parseFloat.
* 2. Display name is a comma-noisy hierarchy ("Konzil, Hafenstraße,
* Konstanz, Konstanz, Regierungsbezirk Freiburg, Baden-Württemberg,
@ -135,16 +135,4 @@ describe('normalizeNominatimResult', () => {
});
expect(result.provider).toBe('nominatim');
});
it('does not set peliasCategories', () => {
// Consumer side keys off the absence of this field as a "fallback
// provider" signal.
const result = normalizeNominatimResult({
lat: '47.0',
lon: '9.0',
class: 'amenity',
type: 'restaurant',
});
expect(result.peliasCategories).toBeUndefined();
});
});

View file

@ -44,8 +44,6 @@ describe('normalizePhotonFeature', () => {
});
expect(result.confidence).toBeCloseTo(0.78, 2);
expect(result.provider).toBe('photon');
// peliasCategories deliberately absent for non-Pelias providers
expect(result.peliasCategories).toBeUndefined();
});
it('builds label from structured fields', () => {
@ -111,7 +109,7 @@ describe('normalizePhotonFeature', () => {
});
it('coordinates: Photon emits [lon, lat] — normalizer must NOT swap', () => {
// Catches the all-too-easy lon/lat flip when porting from Pelias.
// Catches the all-too-easy lon/lat flip in Photon's GeoJSON.
const result = normalizePhotonFeature({
type: 'Feature',
geometry: { type: 'Point', coordinates: [9.1758, 47.6634] },

View file

@ -47,7 +47,7 @@ export type ChainNotice =
/** Sensitive query was blocked from public providers and no local
* provider was healthy no results, but the absence is intentional. */
| 'sensitive_local_unavailable'
/** A non-Pelias provider served the request (Pelias was down). */
/** A public provider served the request (the local provider was down). */
| 'fallback_used';
export interface ChainOptions {
@ -161,9 +161,9 @@ export class ProviderChain {
}
// Stale or missing — refresh. We don't await this aggressively in
// happy paths (Pelias up + healthy is the cheapest case), but on
// cold-start every entry is missing so the first request pays for
// one health probe per provider.
// happy paths (photon-self up + healthy is the cheapest case),
// but on cold-start every entry is missing so the first request
// pays for one health probe per provider.
const healthy = await provider.health(signal);
this.health.set(provider.name, { healthy, checkedAt: now });
if (!healthy) {

View file

@ -6,9 +6,9 @@
* search/reverse. A custom `User-Agent` is required (Nominatim returns
* 403 to default-UA fetches).
*
* Compared to Pelias/Photon, Nominatim returns a single flat array
* rather than GeoJSON. We adapt the shape and synthesize a confidence
* score from `importance`.
* Unlike Photon, Nominatim returns a single flat array rather than
* GeoJSON. We adapt the shape and synthesize a confidence score from
* `importance`.
*
* https://nominatim.org/release-docs/develop/api/Search/
* https://operations.osmfoundation.org/policies/nominatim/

View file

@ -1,178 +0,0 @@
/**
* Pelias provider primary backend, self-hosted with the DACH OSM index.
*
* Forward-search uses /autocomplete first (fast venue match) and falls
* back to /search if autocomplete returns zero features (autocomplete
* deliberately excludes the address layer for perf).
*/
import { mapPeliasToPlaceCategory } from '../lib/category-map';
import type {
GeocodingProvider,
GeocodingResult,
ProviderResponse,
ReverseRequest,
SearchRequest,
} from './types';
export interface PeliasConfig {
apiUrl: string;
timeoutMs: number;
}
export class PeliasProvider implements GeocodingProvider {
readonly name = 'pelias' as const;
readonly privacy = 'local' as const;
constructor(private readonly config: PeliasConfig) {}
async search(req: SearchRequest, signal?: AbortSignal): Promise<ProviderResponse> {
const params = new URLSearchParams({
text: req.q.trim(),
size: String(req.limit),
lang: req.lang,
});
if (req.focusLat && req.focusLon) {
params.set('focus.point.lat', req.focusLat);
params.set('focus.point.lon', req.focusLon);
}
// /autocomplete first (fast venue match), then /search if empty.
// Both attempts are wrapped in the same external timeout signal so
// a cumulative slow Pelias still falls through to the next provider.
try {
const ac = await this.fetch(`/autocomplete?${params}`, signal);
if (!ac.ok) return { ok: false, kind: 'unreachable', status: ac.status };
let features = ac.features;
if (features.length === 0) {
const s = await this.fetch(`/search?${params}`, signal);
if (s.ok) features = s.features;
// /search returning a non-OK after /autocomplete returned OK-but-empty
// is a clean zero-results answer, not a fall-through. We trust the
// successful autocomplete probe.
}
return { ok: true, results: features.map(normalizePeliasFeature) };
} catch (e) {
return { ok: false, kind: 'unreachable', error: errorMessage(e) };
}
}
async reverse(req: ReverseRequest, signal?: AbortSignal): Promise<ProviderResponse> {
const params = new URLSearchParams({
'point.lat': req.lat,
'point.lon': req.lon,
size: '3',
lang: req.lang,
});
try {
const r = await this.fetch(`/reverse?${params}`, signal);
if (!r.ok) return { ok: false, kind: 'unreachable', status: r.status };
return { ok: true, results: r.features.map(normalizePeliasFeature) };
} catch (e) {
return { ok: false, kind: 'unreachable', error: errorMessage(e) };
}
}
async health(signal?: AbortSignal): Promise<boolean> {
try {
const url = `${this.config.apiUrl}/status`;
const res = await fetch(url, {
signal: combineSignals(signal, AbortSignal.timeout(this.config.timeoutMs)),
});
// /v1/status doesn't exist on every Pelias version — a 404 still
// means the server is up. Anything else (5xx, ECONNREFUSED, timeout)
// is unhealthy.
return res.ok || res.status === 404;
} catch {
return false;
}
}
private async fetch(
path: string,
signal?: AbortSignal
): Promise<{ ok: boolean; status: number; features: PeliasFeature[] }> {
const res = await fetch(`${this.config.apiUrl}${path}`, {
signal: combineSignals(signal, AbortSignal.timeout(this.config.timeoutMs)),
});
if (!res.ok) return { ok: false, status: res.status, features: [] };
const data = (await res.json()) as PeliasResponse;
return { ok: true, status: res.status, features: data.features ?? [] };
}
}
// --- Pelias native types ---
interface PeliasResponse {
type: 'FeatureCollection';
features: PeliasFeature[];
}
interface PeliasFeature {
type: 'Feature';
geometry: {
type: 'Point';
coordinates: [number, number]; // [lon, lat]
};
properties: {
id?: string;
name?: string;
label?: string;
confidence?: number;
layer?: string;
street?: string;
housenumber?: string;
postalcode?: string;
locality?: string;
region?: string;
country?: string;
category?: string[];
};
}
export function normalizePeliasFeature(feature: PeliasFeature): GeocodingResult {
const props = feature.properties;
const [lon, lat] = feature.geometry.coordinates;
return {
label: props.label || props.name || '',
name: props.name || '',
latitude: lat,
longitude: lon,
address: {
street: props.street,
houseNumber: props.housenumber,
postalCode: props.postalcode,
city: props.locality,
state: props.region,
country: props.country,
},
category: mapPeliasToPlaceCategory(props.category, props.layer),
peliasCategories: props.category,
confidence: props.confidence ?? 0,
provider: 'pelias',
};
}
function errorMessage(e: unknown): string {
return e instanceof Error ? e.message : String(e);
}
/** Combine an external AbortSignal with our own timeout signal. AbortSignal.any
* exists in Bun but TS typing is patchy across runtimes small helper. */
function combineSignals(...signals: Array<AbortSignal | undefined>): AbortSignal {
const real = signals.filter((s): s is AbortSignal => !!s);
if (real.length === 1) return real[0];
const ctrl = new AbortController();
for (const s of real) {
if (s.aborted) {
ctrl.abort(s.reason);
break;
}
s.addEventListener('abort', () => ctrl.abort(s.reason), { once: true });
}
return ctrl.signal;
}

View file

@ -5,15 +5,10 @@
* importer). The HTTP shape is GeoJSON FeatureCollection with `properties`
* holding `osm_key`/`osm_value` raw OSM tags + structured address fields.
*
* Compared to Pelias:
* + No rate limit advertised, but be a polite neighbor: short timeouts,
* no retries, cache aggressively.
* + Reverse geocoding takes lon/lat (note the order different from
* Pelias's point.lat/point.lon). Easy to flip if not careful.
* - No `confidence` field. We approximate from `importance` (01) when
* present, else 0.5 as a neutral default.
* - No DACH-specific tuning German venue names sometimes lose umlauts
* in display labels. Acceptable for a fallback.
* Same class powers both `photon-self` (self-hosted, privacy: 'local')
* and `photon` (public komoot.io, privacy: 'public'). Reverse-geocoding
* takes lon/lat (note the order). Confidence is approximated from
* `importance` (01) when present, else 0.5 as a neutral default.
*/
import { mapOsmTagToPlaceCategory } from '../lib/osm-category-map';
@ -207,9 +202,6 @@ export function normalizePhotonFeature(
country: props.country,
},
category,
// peliasCategories deliberately omitted — Photon has osm_key:osm_value
// but the consumer side keys off the absence of this field as a
// "result came from a fallback" signal.
confidence: typeof props.importance === 'number' ? props.importance : 0.5,
provider: providerName,
};

View file

@ -29,12 +29,8 @@ export interface GeocodingResult {
};
/** Our Places category, derived from the provider's native taxonomy. */
category: PlaceCategory;
/** Raw Pelias categories (food, retail, transport, ) only present
* when the result came from Pelias. Photon/Nominatim don't have an
* equivalent multi-tag taxonomy. */
peliasCategories?: string[];
/** Confidence score 01. Pelias provides this natively; Photon/Nominatim
* approximate it from `importance`. */
/** Confidence score 01. Photon/Nominatim approximate it from
* `importance`. */
confidence: number;
/** Which provider answered useful for telemetry + UI hints
* ("approximate match" badge for fallback providers). */
@ -42,8 +38,8 @@ export interface GeocodingResult {
}
/**
* Provider identifiers. Two of these wrap the same `PhotonProvider`
* class with different configs:
* Provider identifiers. `photon-self` and `photon` both wrap the same
* `PhotonProvider` class with different configs:
*
* - `photon-self`: self-hosted Photon (typically on mana-gpu),
* `privacy: 'local'`. Eligible for sensitive queries.
@ -55,7 +51,7 @@ export interface GeocodingResult {
* tracks per-provider health. A single `photon` slot can't simultaneously
* mean two different backends.
*/
export type ProviderName = 'pelias' | 'photon-self' | 'photon' | 'nominatim';
export type ProviderName = 'photon-self' | 'photon' | 'nominatim';
export interface SearchRequest {
q: string;

View file

@ -21,10 +21,10 @@ interface CachedAnswer {
}
/**
* TTL chooser. Public-API results (Photon/Nominatim) get the longer TTL
* caching aggressively is the main privacy lever once the query has
* already left our network. Local results stay on the shorter TTL because
* the Pelias index can be re-imported; we don't want stale local data.
* TTL chooser. Public-API results (photon/nominatim) get a shorter TTL
* (1h) so a transient blip in photon-self doesn't pin stale fallback
* answers in the cache for days. Local results (photon-self) get the
* longer 24h TTL.
*
* Sensitive-query notices are cached on the short TTL too (the user might
* retry from a different angle quickly), and `undefined` provider (chain

View file

@ -9,35 +9,43 @@ export function createHealthRoutes(config: Config, chain: ProviderChain) {
app.get('/', (c) => c.json({ status: 'ok', service: 'mana-geocoding' }));
/**
* Upstream Pelias health. Proxies a request to the Pelias API so
* monitoring can reach it without `extra_hosts: host.docker.internal`
* on the blackbox exporter.
* Upstream photon-self health. Proxies a request to the self-hosted
* Photon so monitoring can reach it without `extra_hosts:
* host.docker.internal` on the blackbox exporter.
*
* Backwards-compatible: existing prometheus probes against this
* endpoint keep working. Now reports `degraded` (200) instead of `down`
* (503) when Pelias is unreachable but a fallback provider is healthy
* the system can still serve queries, just slower / less precise.
* Reports `degraded` (200) instead of `down` (503) when photon-self is
* unreachable but a public fallback (photon / nominatim) is healthy
* the system can still serve queries, just at the cost of leaking the
* query content to a third party.
*/
app.get('/pelias', async (c) => {
app.get('/photon-self', async (c) => {
const upstream = config.photonSelf.apiUrl;
if (!upstream) {
return c.json({ status: 'unconfigured', error: 'PHOTON_SELF_API_URL is unset' }, 503);
}
try {
const res = await fetch(`${config.pelias.apiUrl}/status`, {
const res = await fetch(`${upstream}/api?q=Konstanz&limit=1`, {
signal: AbortSignal.timeout(5000),
});
if (!res.ok && res.status !== 404) {
if (!res.ok) {
return c.json(
{ status: 'degraded', upstream: res.status, fallbackAvailable: chainHasFallback(chain) },
chainHasFallback(chain) ? 200 : 503
{
status: 'degraded',
upstream: res.status,
fallbackAvailable: chainHasPublicFallback(chain),
},
chainHasPublicFallback(chain) ? 200 : 503
);
}
return c.json({ status: 'ok', upstream: 'pelias-api' });
return c.json({ status: 'ok', upstream: 'photon-self' });
} catch (e) {
return c.json(
{
status: chainHasFallback(chain) ? 'degraded' : 'down',
status: chainHasPublicFallback(chain) ? 'degraded' : 'down',
error: e instanceof Error ? e.message : 'unknown',
fallbackAvailable: chainHasFallback(chain),
fallbackAvailable: chainHasPublicFallback(chain),
},
chainHasFallback(chain) ? 200 : 503
chainHasPublicFallback(chain) ? 200 : 503
);
}
});
@ -56,10 +64,10 @@ export function createHealthRoutes(config: Config, chain: ProviderChain) {
}
/**
* Check if any non-Pelias provider is currently believed healthy. Used
* to soften /pelias health to "degraded" instead of "down" when a
* fallback can still serve traffic.
* Check if any public fallback provider is currently believed healthy.
* Used to soften /photon-self health to "degraded" instead of "down"
* when a public fallback can still serve traffic.
*/
function chainHasFallback(chain: ProviderChain): boolean {
return chain.getHealthSnapshot().some((p) => p.name !== 'pelias' && p.healthy);
function chainHasPublicFallback(chain: ProviderChain): boolean {
return chain.getHealthSnapshot().some((p) => p.name !== 'photon-self' && p.healthy);
}