chore(geocoding): remove Pelias + close 3 bypass paths to public Nominatim

Pelias was retired from the Mac mini on 2026-04-28; photon-self
(self-hosted Photon on mana-gpu) has been the live primary since then.
This removes the now-dead Pelias adapter, config, tests, and the
services/mana-geocoding/pelias/ stack — the entire compose file, the
geojsonify_place_details.js patch, the setup.sh import script.

Provider chain is now `photon-self → photon → nominatim`. The chain
keeps its `privacy: 'local' | 'public'` split, sensitive-query
blocking, coord quantization, and aggressive caching unchanged.

Three direct calls to nominatim.openstreetmap.org that bypassed
mana-geocoding now route through the wrapper:

- citycorners/add-city + citycorners/cities/[slug]/add use the shared
  searchAddress() client (browser → same-origin proxy → mana-geocoding
  → photon-self).
- memoro mobile drops its OSM reverse-geocoding fallback entirely;
  Expo's on-device reverse-geocoding stays as the sole path. Routing
  through the wrapper would require a memoro-server proxy endpoint —
  a follow-up if Expo's quality proves insufficient.

Other behavioral changes:

- CACHE_PUBLIC_TTL_MS dropped from 7d → 1h. The long TTL was a
  privacy-amplification trick from the Pelias era; with photon-self
  serving the bulk of traffic, a transient cross-LAN blip was pinning
  cached fallback answers for days. 1h gives quick recovery.
- /health/pelias renamed to /health/photon-self; prometheus blackbox
  config + status-page generator updated.
- mana-geocoding container no longer needs `extra_hosts:
  host.docker.internal:host-gateway` (was only there for the
  Pelias-on-host-network era).

113 tests passing. CLAUDE.md rewritten to reflect the post-Pelias
architecture.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-28 22:12:26 +02:00
parent 7bca16dfa7
commit 2bbcf14aba
35 changed files with 330 additions and 1262 deletions

View file

@ -14,7 +14,6 @@ import type { Config } from '../config';
function baseConfig(overrides: Partial<Config> = {}): Config {
return {
port: 3018,
pelias: { apiUrl: 'http://127.0.0.1:1' },
photon: { apiUrl: 'https://photon.komoot.io' },
photonSelf: { apiUrl: undefined },
nominatim: {
@ -25,7 +24,7 @@ function baseConfig(overrides: Partial<Config> = {}): Config {
cors: { origins: [] },
cache: { maxEntries: 100, ttlMs: 1000, publicTtlMs: 7000 },
providers: {
enabled: ['photon-self', 'pelias', 'photon', 'nominatim'],
enabled: ['photon-self', 'photon', 'nominatim'],
healthCacheMs: 30_000,
timeoutMs: 8000,
},

View file

@ -9,7 +9,6 @@ import type { Config } from './config';
import { RateLimiter } from './lib/rate-limiter';
import { ProviderChain } from './providers/chain';
import { NominatimProvider } from './providers/nominatim';
import { PeliasProvider } from './providers/pelias';
import { PhotonProvider } from './providers/photon';
import type { GeocodingProvider, ProviderName } from './providers/types';
import { createGeocodeRoutes } from './routes/geocode';
@ -47,18 +46,10 @@ export function createApp(config: Config): Hono {
export function createChain(config: Config): ProviderChain {
const built = new Map<ProviderName, GeocodingProvider>();
built.set(
'pelias',
new PeliasProvider({
apiUrl: config.pelias.apiUrl,
timeoutMs: config.providers.timeoutMs,
})
);
// Self-hosted Photon (mana-gpu). Only registered when the env-var is set
// — pre-migration this stays absent and the chain falls through to
// public providers as before. Once the GPU server is running Photon,
// flip PHOTON_SELF_API_URL on and this becomes the primary provider.
// — without it the chain runs on public providers only. Once the GPU
// server is running Photon, flip PHOTON_SELF_API_URL on and this
// becomes the primary provider.
if (config.photonSelf.apiUrl) {
built.set(
'photon-self',

View file

@ -6,10 +6,6 @@ import type { ProviderName } from './providers/types';
export interface Config {
port: number;
pelias: {
/** Pelias API base URL (the API container, not the placeholder service) */
apiUrl: string;
};
photon: {
/** Photon base URL public komoot endpoint by default. Used by
* the `'photon'` provider slot which always has `privacy: 'public'`. */
@ -20,7 +16,7 @@ export interface Config {
* GPU server). When set, the wrapper registers a separate
* `'photon-self'` provider with `privacy: 'local'` eligible for
* sensitive queries. When undefined, the slot is disabled and the
* chain only has the public providers (current pre-migration state). */
* chain runs on public providers only. */
apiUrl: string | undefined;
};
nominatim: {
@ -37,12 +33,13 @@ export interface Config {
/** Max entries in the in-memory LRU cache */
maxEntries: number;
/** Default TTL in milliseconds (24h used for results from local
* providers like Pelias, where the index can be re-imported) */
* providers like photon-self) */
ttlMs: number;
/** Extended TTL for results that came from public APIs (Photon,
* Nominatim). 7 days by default caching aggressively reduces
* the number of times we forward query content to a third party,
* which is the main privacy lever we have over public providers. */
/** TTL for results that came from public APIs (Photon, Nominatim).
* Capped at 1h so a brief blip in photon-self can't pin stale
* public-fallback answers in the cache for days. The privacy
* benefit of long TTLs (fewer outbound queries) is moot now that
* photon-self serves the bulk of traffic. */
publicTtlMs: number;
};
providers: {
@ -60,9 +57,6 @@ export interface Config {
export function loadConfig(): Config {
return {
port: parseInt(process.env.PORT || '3018', 10),
pelias: {
apiUrl: process.env.PELIAS_API_URL || 'http://localhost:4000/v1',
},
photon: {
apiUrl: process.env.PHOTON_API_URL || 'https://photon.komoot.io',
},
@ -86,17 +80,15 @@ export function loadConfig(): Config {
cache: {
maxEntries: parseInt(process.env.CACHE_MAX_ENTRIES || '5000', 10),
ttlMs: parseInt(process.env.CACHE_TTL_MS || String(24 * 60 * 60 * 1000), 10),
publicTtlMs: parseInt(process.env.CACHE_PUBLIC_TTL_MS || String(7 * 24 * 60 * 60 * 1000), 10),
publicTtlMs: parseInt(process.env.CACHE_PUBLIC_TTL_MS || String(60 * 60 * 1000), 10),
},
providers: {
// Default order (when GEOCODING_PROVIDERS is unset): try the
// self-hosted Photon first if it's been configured, then public
// providers as fallback. `photon-self` is silently dropped at
// chain-build time if `photonSelf.apiUrl` is undefined, so the
// list is the same shape regardless of migration status.
// chain-build time if `photonSelf.apiUrl` is undefined.
enabled: parseProviderList(process.env.GEOCODING_PROVIDERS, [
'photon-self',
'pelias',
'photon',
'nominatim',
]),
@ -112,7 +104,7 @@ export function loadConfig(): Config {
function parseProviderList(raw: string | undefined, fallback: ProviderName[]): ProviderName[] {
if (!raw) return fallback;
const valid: ProviderName[] = ['pelias', 'photon-self', 'photon', 'nominatim'];
const valid: ProviderName[] = ['photon-self', 'photon', 'nominatim'];
const parsed = raw
.split(',')
.map((s) => s.trim().toLowerCase())

View file

@ -1,9 +1,8 @@
/**
* mana-geocoding Self-hosted geocoding proxy.
*
* Wraps a local Pelias instance with caching and OSM PlaceCategory
* mapping. All geocoding queries stay within our infrastructure
* no user location data leaves the network.
* mana-geocoding geocoding proxy with provider chain (photon-self
* public photon public nominatim) and aggressive caching. Sensitive
* queries are blocked from public providers; all forwarded queries are
* coordinate-quantized.
*/
import { createApp } from './app';
@ -12,13 +11,17 @@ import { loadConfig } from './config';
const config = loadConfig();
console.log(`mana-geocoding starting on port ${config.port}...`);
console.log(`Pelias API: ${config.pelias.apiUrl}`);
console.log(`Providers: ${config.providers.enabled.join(', ')}`);
if (config.photonSelf.apiUrl) {
console.log(`photon-self: ${config.photonSelf.apiUrl}`);
}
export default {
port: config.port,
// Bun's default idleTimeout is 10s — too tight for Pelias cold queries
// that need to hit Elasticsearch and libpostal. 60s is generous enough
// for the worst-case while still cutting off stuck connections.
// Bun's default idleTimeout is 10s — too tight for cold cross-LAN
// queries to photon-self that hit OpenSearch on a fresh shard. 60s is
// generous enough for the worst case while still cutting off stuck
// connections.
idleTimeout: 60,
fetch: createApp(config).fetch,
};

View file

@ -1,184 +0,0 @@
/**
* Unit tests for the PeliasPlaceCategory mapping.
*
* This is the subtle part of the service: a Pelias venue often has
* multiple categories (e.g. a restaurant is `['food','retail','nightlife']`)
* and we need to pick the most specific one. The priority list in
* category-map.ts encodes that choice, and these tests lock it in.
*/
import { describe, it, expect } from 'bun:test';
import { mapPeliasToPlaceCategory } from '../category-map';
describe('mapPeliasToPlaceCategory', () => {
describe('priority-ordered multi-category resolution', () => {
it('picks food over retail for a restaurant', () => {
expect(mapPeliasToPlaceCategory(['food', 'retail', 'nightlife'])).toBe('food');
});
it('picks food over retail for a bakery', () => {
// Bakery is tagged food+retail in the Pelias OSM taxonomy
expect(mapPeliasToPlaceCategory(['food', 'retail'])).toBe('food');
});
it('picks food over nightlife for a cafe', () => {
expect(mapPeliasToPlaceCategory(['food', 'nightlife'])).toBe('food');
});
it('picks transit over professional for a car_rental', () => {
// car_rental is tagged transport+professional in Pelias
expect(mapPeliasToPlaceCategory(['transport', 'professional'])).toBe('transit');
});
it('picks transit for a bus_station (multiple transport subcategories)', () => {
expect(mapPeliasToPlaceCategory(['transport', 'transport:public', 'transport:bus'])).toBe(
'transit'
);
});
it('picks transit for a station (transport:rail)', () => {
expect(
mapPeliasToPlaceCategory([
'transport',
'transport:public',
'transport:station',
'transport:rail',
])
).toBe('transit');
});
});
describe('single-category resolution', () => {
it('maps food to food', () => {
expect(mapPeliasToPlaceCategory(['food'])).toBe('food');
});
it('maps retail to shopping', () => {
expect(mapPeliasToPlaceCategory(['retail'])).toBe('shopping');
});
it('maps transport to transit', () => {
expect(mapPeliasToPlaceCategory(['transport'])).toBe('transit');
});
it('maps education to work', () => {
expect(mapPeliasToPlaceCategory(['education'])).toBe('work');
});
it('maps professional to work', () => {
expect(mapPeliasToPlaceCategory(['professional'])).toBe('work');
});
it('maps government to work', () => {
expect(mapPeliasToPlaceCategory(['government'])).toBe('work');
});
it('maps finance to work', () => {
expect(mapPeliasToPlaceCategory(['finance'])).toBe('work');
});
it('maps entertainment to leisure', () => {
expect(mapPeliasToPlaceCategory(['entertainment'])).toBe('leisure');
});
it('maps nightlife to leisure', () => {
expect(mapPeliasToPlaceCategory(['nightlife'])).toBe('leisure');
});
it('maps recreation to leisure', () => {
expect(mapPeliasToPlaceCategory(['recreation'])).toBe('leisure');
});
it('maps health to other', () => {
expect(mapPeliasToPlaceCategory(['health'])).toBe('other');
});
it('maps religion to other', () => {
expect(mapPeliasToPlaceCategory(['religion'])).toBe('other');
});
});
describe('real-world Pelias venue categories', () => {
// These are literal category arrays observed from the Konstanz DACH
// index during the 2026-04-11 deploy verification. Locking them in
// as regression tests so future priority changes can't silently
// break address search in production.
it('Konzil Restaurant Konstanz → food', () => {
expect(mapPeliasToPlaceCategory(['food', 'retail', 'nightlife'])).toBe('food');
});
it('Stuttgart Hauptbahnhof → transit', () => {
expect(
mapPeliasToPlaceCategory([
'transport',
'transport:public',
'transport:station',
'transport:rail',
])
).toBe('transit');
});
it('Physiotherapie-Schule → work', () => {
expect(mapPeliasToPlaceCategory(['education'])).toBe('work');
});
it('MX-Park (Rennstrecke) → leisure', () => {
expect(mapPeliasToPlaceCategory(['recreation'])).toBe('leisure');
});
it('KulturKiosk → work', () => {
// KulturKiosk is tagged professional in Pelias
expect(mapPeliasToPlaceCategory(['professional'])).toBe('work');
});
it('Kölner Domshop → shopping', () => {
expect(mapPeliasToPlaceCategory(['retail'])).toBe('shopping');
});
});
describe('empty / null / unknown categories', () => {
it('returns other for empty array', () => {
expect(mapPeliasToPlaceCategory([])).toBe('other');
});
it('returns other for undefined', () => {
expect(mapPeliasToPlaceCategory(undefined)).toBe('other');
});
it('returns other for null', () => {
expect(mapPeliasToPlaceCategory(null)).toBe('other');
});
it('returns other for unknown category strings', () => {
expect(mapPeliasToPlaceCategory(['random', 'unknown'])).toBe('other');
});
it('picks known category even if unknown ones come first', () => {
expect(mapPeliasToPlaceCategory(['unknown', 'food'])).toBe('food');
});
});
describe('Pelias layer fallback', () => {
it('uses layer hint for venue with no categories', () => {
expect(mapPeliasToPlaceCategory(undefined, 'venue')).toBe('other');
});
it('uses layer hint for address', () => {
expect(mapPeliasToPlaceCategory(undefined, 'address')).toBe('other');
});
it('uses layer hint for street', () => {
expect(mapPeliasToPlaceCategory(undefined, 'street')).toBe('other');
});
it('uses layer hint for locality', () => {
expect(mapPeliasToPlaceCategory(undefined, 'locality')).toBe('other');
});
it('prefers categories over layer hint', () => {
// A venue with food category should be food, not other
expect(mapPeliasToPlaceCategory(['food'], 'venue')).toBe('food');
});
});
});

View file

@ -2,8 +2,6 @@
* Unit tests for the raw-OSM-tag PlaceCategory mapper.
*
* Covers the cases Photon and Nominatim emit for typical DACH queries.
* The Pelias mapper has its own tests in category-map.test.ts; this file
* tests *only* the raw-OSM-tag path used by the public-API fallbacks.
*/
import { describe, expect, it } from 'bun:test';
@ -54,7 +52,7 @@ describe('mapOsmTagToPlaceCategory', () => {
expect(mapOsmTagToPlaceCategory('aeroway', 'aerodrome')).toBe('transit');
});
it('amenity:car_rental → transit', () => {
// Matches Pelias mapper's "car_rental → transit" decision
// car_rental → transit (transport-flavored)
expect(mapOsmTagToPlaceCategory('amenity', 'car_rental')).toBe('transit');
});
});
@ -116,7 +114,7 @@ describe('mapOsmTagToPlaceCategory', () => {
describe('other (health/religion/unknown)', () => {
it('amenity:hospital → other', () => {
// Health goes to other (matches Pelias mapper)
// Health goes to other
expect(mapOsmTagToPlaceCategory('amenity', 'hospital')).toBe('other');
});
it('amenity:pharmacy → other', () => {

View file

@ -1,7 +1,7 @@
/**
* Simple in-memory LRU cache with TTL for geocoding results.
* Geocoding results rarely change, so we cache aggressively to
* reduce load on the Pelias instance.
* Geocoding results rarely change, so we cache to reduce load on
* upstream providers.
*/
interface CacheEntry<T> {
@ -37,11 +37,10 @@ export class LRUCache<T> {
/**
* Insert or update a cache entry.
*
* @param ttlOverrideMs Optional per-entry TTL. Useful when results
* from public-API providers should live longer than results from
* the (frequently-changing) local Pelias index e.g. 7 days for
* Photon/Nominatim answers, 24 hours for Pelias answers. When
* omitted, the constructor's default TTL applies.
* @param ttlOverrideMs Optional per-entry TTL. The route layer uses
* this so public-fallback answers expire faster than local-provider
* answers see `ttlFor()` in routes/geocode.ts. When omitted, the
* constructor's default TTL applies.
*/
set(key: string, value: T, ttlOverrideMs?: number): void {
// Delete first so re-insert goes to end

View file

@ -1,89 +1,10 @@
/**
* Maps Pelias categories (OSM taxonomy) to our 7 Places categories.
*
* Pelias' openstreetmap importer tags venues with categories from its
* built-in taxonomy (food, retail, transport, health, education, ).
* We collapse those into the simpler Places enum:
* The 7 Places categories used across the geocoding wrapper and clients.
*
* home · work · food · shopping · transit · leisure · other
*
* A venue can have multiple Pelias categories (e.g. a restaurant is
* tagged `['food', 'retail', 'nightlife']`). We pick the most specific
* one in priority order rather than the first a restaurant should be
* "food" even though "retail" also matches.
* Provider-specific mappers (see `osm-category-map.ts` for Photon /
* Nominatim) collapse the upstream taxonomy into this shape. `home` is
* never auto-detected it's set manually by the user.
*/
export type PlaceCategory = 'home' | 'work' | 'food' | 'shopping' | 'transit' | 'leisure' | 'other';
/**
* Priority-ordered: first matching category wins. Earlier entries are
* more specific, so "food" beats "retail", "transport" beats "professional".
*/
const PELIAS_PRIORITY: Array<[string, PlaceCategory]> = [
// Food is strongest signal — a restaurant is food, not retail
['food', 'food'],
// Transit/transport
['transport:public', 'transit'],
['transport:air', 'transit'],
['transport:sea', 'transit'],
['transport:bus', 'transit'],
['transport:taxi', 'transit'],
['transport', 'transit'],
// Shopping — explicit retail markers
['retail', 'shopping'],
// Leisure / entertainment / recreation
['entertainment', 'leisure'],
['nightlife', 'leisure'],
['recreation', 'leisure'],
// Work-ish
['education', 'work'],
['professional', 'work'],
['government', 'work'],
['finance', 'work'],
// Health/religion fall through to other
['health', 'other'],
['religion', 'other'],
];
/**
* Derive a PlaceCategory from a Pelias feature's category array.
*
* @param categories The `category` array from a Pelias feature's properties
* @param peliasLayer The Pelias layer (venue, address, street, ) used as fallback hint
*/
export function mapPeliasToPlaceCategory(
categories?: string[] | null,
peliasLayer?: string
): PlaceCategory {
if (Array.isArray(categories) && categories.length > 0) {
// Walk our priority list and pick the first match
for (const [peliasCat, placeCat] of PELIAS_PRIORITY) {
if (categories.includes(peliasCat)) return placeCat;
}
}
// Fallback: use Pelias layer as a hint. Addresses/streets/regions
// all land in "other" since they aren't really "places" in the
// categorical sense.
if (peliasLayer) {
switch (peliasLayer) {
case 'venue':
return 'other';
case 'address':
case 'street':
return 'other';
case 'neighbourhood':
case 'locality':
case 'region':
case 'country':
return 'other';
}
}
return 'other';
}

View file

@ -2,15 +2,9 @@
* Maps raw OSM `class:type` tags (Photon's `osm_key:osm_value`,
* Nominatim's `class:type`) to our 7 PlaceCategories.
*
* Pelias has a curated multi-category taxonomy (`food`, `retail`,
* `transport`, ) that we map via `category-map.ts`. Photon and Nominatim
* return raw OSM tags instead `amenity:restaurant`, `shop:supermarket`,
* `public_transport:station`, etc. so they need a different lookup.
*
* The list below is intentionally narrow: it only covers tags we actually
* see in real Photon/Nominatim responses for DACH queries. Anything else
* falls through to `other`, which matches the Pelias mapper's behavior for
* unknown categories.
* falls through to `other`.
*
* If a query returns a tag we don't handle, that's the signal to add it
* here not to try to enumerate all 1000+ OSM types.
@ -25,8 +19,8 @@ interface Tag {
/**
* Priority-ordered: first match wins. More-specific entries (with a
* `value`) come before generic key-only entries. Matches Pelias's
* "food beats retail" priority intent.
* `value`) come before generic key-only entries. Same "food beats retail"
* priority intent as the upstream taxonomies.
*/
const OSM_RULES: Array<{ match: Tag; category: PlaceCategory }> = [
// ── Food (highest priority — restaurants are food, even when also
@ -82,7 +76,7 @@ const OSM_RULES: Array<{ match: Tag; category: PlaceCategory }> = [
{ match: { key: 'amenity', value: 'embassy' }, category: 'work' },
{ match: { key: 'office' }, category: 'work' },
// ── Health / religion → other (matches Pelias mapper) ───────────
// ── Health / religion → other ───────────────────────────────────
{ match: { key: 'amenity', value: 'hospital' }, category: 'other' },
{ match: { key: 'amenity', value: 'clinic' }, category: 'other' },
{ match: { key: 'amenity', value: 'doctors' }, category: 'other' },

View file

@ -14,7 +14,7 @@
* not telling Photon "user is at THIS HOUSE". Reverse geocoding
* against the city block instead of the building is acceptable.
*
* Pelias and other LAN-local providers always get the original
* Photon-self and other LAN-local providers always get the original
* full-precision coordinates quantization only applies on the way
* out to the public internet.
*/

View file

@ -12,7 +12,7 @@
*
* Trade-offs:
* - False positives are OK (a user searching for "Praxis Müller" who
* wanted the dance studio gets 0 results when Pelias is down not
* wanted the dance studio gets 0 results when photon-self is down not
* ideal but better than a privacy leak)
* - False negatives are NOT OK (we'd rather over-block than under-block)
* - The list is intentionally narrow: only words with clear medical or

View file

@ -68,7 +68,7 @@ const SEARCH: SearchRequest = { q: 'test', limit: 5, lang: 'de' };
describe('ProviderChain — happy path', () => {
it('returns the first provider that succeeds', async () => {
const a = new FakeProvider('pelias');
const a = new FakeProvider('photon-self');
const b = new FakeProvider('photon');
const chain = new ProviderChain({
providers: [a, b],
@ -76,29 +76,29 @@ describe('ProviderChain — happy path', () => {
});
const res = await chain.search(SEARCH);
expect(res.ok).toBe(true);
expect(res.provider).toBe('pelias');
expect(res.tried).toEqual(['pelias']);
expect(res.provider).toBe('photon-self');
expect(res.tried).toEqual(['photon-self']);
expect(a.calls.search).toBe(1);
expect(b.calls.search).toBe(0);
});
it('honors the providers array order', async () => {
const photon = new FakeProvider('photon');
const pelias = new FakeProvider('pelias');
const local = new FakeProvider('photon-self');
// photon first this time
const chain = new ProviderChain({
providers: [photon, pelias],
providers: [photon, local],
healthCacheMs: 60_000,
});
const res = await chain.search(SEARCH);
expect(res.provider).toBe('photon');
expect(pelias.calls.search).toBe(0);
expect(local.calls.search).toBe(0);
});
});
describe('ProviderChain — failover', () => {
it('falls through on unreachable, returns next provider', async () => {
const a = new FakeProvider('pelias', {
const a = new FakeProvider('photon-self', {
search: async () => ({ ok: false, kind: 'unreachable', status: 503 }),
});
const b = new FakeProvider('photon');
@ -106,7 +106,7 @@ describe('ProviderChain — failover', () => {
const res = await chain.search(SEARCH);
expect(res.ok).toBe(true);
expect(res.provider).toBe('photon');
expect(res.tried).toEqual(['pelias', 'photon']);
expect(res.tried).toEqual(['photon-self', 'photon']);
});
it('falls through on rate_limited', async () => {
@ -121,20 +121,20 @@ describe('ProviderChain — failover', () => {
it('STOPS on empty results — does not consume fallback budget', async () => {
// A clean empty answer is definitive: don't burn through public APIs.
const a = new FakeProvider('pelias', {
const a = new FakeProvider('photon-self', {
search: async () => ({ ok: true, results: [] }),
});
const b = new FakeProvider('photon');
const chain = new ProviderChain({ providers: [a, b], healthCacheMs: 60_000 });
const res = await chain.search(SEARCH);
expect(res.ok).toBe(true);
expect(res.provider).toBe('pelias');
expect(res.provider).toBe('photon-self');
expect(res.results).toEqual([]);
expect(b.calls.search).toBe(0);
});
it('returns ok:false when all providers fail', async () => {
const a = new FakeProvider('pelias', {
const a = new FakeProvider('photon-self', {
search: async () => ({ ok: false, kind: 'unreachable' }),
});
const b = new FakeProvider('photon', {
@ -144,23 +144,23 @@ describe('ProviderChain — failover', () => {
const res = await chain.search(SEARCH);
expect(res.ok).toBe(false);
expect(res.results).toEqual([]);
expect(res.tried).toEqual(['pelias', 'photon']);
expect(res.tried).toEqual(['photon-self', 'photon']);
});
});
describe('ProviderChain — health cache', () => {
it('skips a provider whose health probe returned false', async () => {
const dead = new FakeProvider('pelias', { health: async () => false });
const dead = new FakeProvider('photon-self', { health: async () => false });
const alive = new FakeProvider('photon');
const chain = new ProviderChain({ providers: [dead, alive], healthCacheMs: 60_000 });
const res = await chain.search(SEARCH);
expect(res.tried).toEqual(['photon']); // pelias was skipped, not tried
expect(res.tried).toEqual(['photon']); // local was skipped, not tried
expect(dead.calls.search).toBe(0);
expect(dead.calls.health).toBe(1);
});
it('caches health for healthCacheMs — only one probe per window', async () => {
const a = new FakeProvider('pelias');
const a = new FakeProvider('photon-self');
const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 });
await chain.search(SEARCH);
await chain.search(SEARCH);
@ -171,18 +171,19 @@ describe('ProviderChain — health cache', () => {
it('marks provider unhealthy when search fails, skipping it next time', async () => {
let failNext = true;
const flaky = new FakeProvider('pelias', {
search: async () => (failNext ? { ok: false, kind: 'unreachable' } : okResults('pelias')),
const flaky = new FakeProvider('photon-self', {
search: async () =>
failNext ? { ok: false, kind: 'unreachable' } : okResults('photon-self'),
});
const alive = new FakeProvider('photon');
const chain = new ProviderChain({ providers: [flaky, alive], healthCacheMs: 60_000 });
// First call: pelias fails → cached unhealthy → photon serves
// First call: local fails → cached unhealthy → photon serves
const r1 = await chain.search(SEARCH);
expect(r1.provider).toBe('photon');
expect(r1.tried).toEqual(['pelias', 'photon']);
expect(r1.tried).toEqual(['photon-self', 'photon']);
// Second call: pelias is in unhealthy cache, not tried at all
// Second call: local is in unhealthy cache, not tried at all
failNext = false; // would now succeed but never gets called
const r2 = await chain.search(SEARCH);
expect(r2.provider).toBe('photon');
@ -191,7 +192,7 @@ describe('ProviderChain — health cache', () => {
});
it('refreshes health after cache expires', async () => {
const dead = new FakeProvider('pelias', { health: async () => false });
const dead = new FakeProvider('photon-self', { health: async () => false });
const alive = new FakeProvider('photon');
// 1ms cache for fast test
const chain = new ProviderChain({ providers: [dead, alive], healthCacheMs: 1 });
@ -203,7 +204,7 @@ describe('ProviderChain — health cache', () => {
});
it('clearHealthCache forces re-probe', async () => {
const a = new FakeProvider('pelias');
const a = new FakeProvider('photon-self');
const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 });
await chain.search(SEARCH);
expect(a.calls.health).toBe(1);
@ -215,19 +216,19 @@ describe('ProviderChain — health cache', () => {
describe('ProviderChain — getHealthSnapshot', () => {
it('reports per-provider health + age', async () => {
const ok = new FakeProvider('pelias');
const ok = new FakeProvider('photon-self');
const dead = new FakeProvider('photon', { health: async () => false });
const chain = new ProviderChain({ providers: [ok, dead], healthCacheMs: 60_000 });
await chain.search(SEARCH);
const snap = chain.getHealthSnapshot();
expect(snap).toHaveLength(2);
expect(snap[0]).toMatchObject({ name: 'pelias', healthy: true });
expect(snap[0]).toMatchObject({ name: 'photon-self', healthy: true });
expect(snap[1]).toMatchObject({ name: 'photon', healthy: false });
expect(snap[0].ageMs).toBeLessThan(1000);
});
it('reports Infinity age for never-probed providers', async () => {
const a = new FakeProvider('pelias');
const a = new FakeProvider('photon-self');
const chain = new ProviderChain({ providers: [a], healthCacheMs: 60_000 });
const snap = chain.getHealthSnapshot();
expect(snap[0].ageMs).toBe(Infinity);
@ -237,7 +238,7 @@ describe('ProviderChain — getHealthSnapshot', () => {
describe('ProviderChain — reverse', () => {
it('uses the same provider order for reverse', async () => {
const a = new FakeProvider('pelias', {
const a = new FakeProvider('photon-self', {
reverse: async () => ({ ok: false, kind: 'unreachable' }),
});
const b = new FakeProvider('photon', { privacy: 'public' });
@ -251,26 +252,26 @@ describe('ProviderChain — reverse', () => {
describe('ProviderChain — privacy / localOnly mode', () => {
it('skips public providers when localOnly is true', async () => {
const localPelias = new FakeProvider('pelias', { privacy: 'local' });
const localProvider = new FakeProvider('photon-self', { privacy: 'local' });
const publicPhoton = new FakeProvider('photon', { privacy: 'public' });
const publicNominatim = new FakeProvider('nominatim', { privacy: 'public' });
const chain = new ProviderChain({
providers: [localPelias, publicPhoton, publicNominatim],
providers: [localProvider, publicPhoton, publicNominatim],
healthCacheMs: 60_000,
});
const res = await chain.search(SEARCH, undefined, { localOnly: true });
expect(res.ok).toBe(true);
expect(res.provider).toBe('pelias');
expect(localPelias.calls.search).toBe(1);
expect(res.provider).toBe('photon-self');
expect(localProvider.calls.search).toBe(1);
// Public providers must not even have their search() called
expect(publicPhoton.calls.search).toBe(0);
expect(publicNominatim.calls.search).toBe(0);
});
it('falls back to the second LOCAL provider when the first local fails', async () => {
const local1 = new FakeProvider('pelias', {
const local1 = new FakeProvider('photon-self', {
privacy: 'local',
search: async () => ({ ok: false, kind: 'unreachable' }),
});
@ -313,7 +314,7 @@ describe('ProviderChain — privacy / localOnly mode', () => {
});
it('returns notice: fallback_used when a public provider serves a non-sensitive query', async () => {
const localDown = new FakeProvider('pelias', {
const localDown = new FakeProvider('photon-self', {
privacy: 'local',
health: async () => false,
});
@ -329,10 +330,10 @@ describe('ProviderChain — privacy / localOnly mode', () => {
});
it('NO notice when the local provider serves a non-sensitive query', async () => {
const localUp = new FakeProvider('pelias', { privacy: 'local' });
const localUp = new FakeProvider('photon-self', { privacy: 'local' });
const chain = new ProviderChain({ providers: [localUp], healthCacheMs: 60_000 });
const res = await chain.search(SEARCH);
expect(res.provider).toBe('pelias');
expect(res.provider).toBe('photon-self');
expect(res.notice).toBeUndefined();
});
});

View file

@ -1,7 +1,7 @@
/**
* Tests for normalizing Nominatim's flat-JSON shape into our GeocodingResult.
*
* Nominatim differs from Photon/Pelias in three subtle ways we lock in:
* Nominatim differs from Photon in three subtle ways we lock in:
* 1. Lat/lon are STRINGS, not numbers the normalizer must parseFloat.
* 2. Display name is a comma-noisy hierarchy ("Konzil, Hafenstraße,
* Konstanz, Konstanz, Regierungsbezirk Freiburg, Baden-Württemberg,
@ -135,16 +135,4 @@ describe('normalizeNominatimResult', () => {
});
expect(result.provider).toBe('nominatim');
});
it('does not set peliasCategories', () => {
// Consumer side keys off the absence of this field as a "fallback
// provider" signal.
const result = normalizeNominatimResult({
lat: '47.0',
lon: '9.0',
class: 'amenity',
type: 'restaurant',
});
expect(result.peliasCategories).toBeUndefined();
});
});

View file

@ -44,8 +44,6 @@ describe('normalizePhotonFeature', () => {
});
expect(result.confidence).toBeCloseTo(0.78, 2);
expect(result.provider).toBe('photon');
// peliasCategories deliberately absent for non-Pelias providers
expect(result.peliasCategories).toBeUndefined();
});
it('builds label from structured fields', () => {
@ -111,7 +109,7 @@ describe('normalizePhotonFeature', () => {
});
it('coordinates: Photon emits [lon, lat] — normalizer must NOT swap', () => {
// Catches the all-too-easy lon/lat flip when porting from Pelias.
// Catches the all-too-easy lon/lat flip in Photon's GeoJSON.
const result = normalizePhotonFeature({
type: 'Feature',
geometry: { type: 'Point', coordinates: [9.1758, 47.6634] },

View file

@ -47,7 +47,7 @@ export type ChainNotice =
/** Sensitive query was blocked from public providers and no local
* provider was healthy no results, but the absence is intentional. */
| 'sensitive_local_unavailable'
/** A non-Pelias provider served the request (Pelias was down). */
/** A public provider served the request (the local provider was down). */
| 'fallback_used';
export interface ChainOptions {
@ -161,9 +161,9 @@ export class ProviderChain {
}
// Stale or missing — refresh. We don't await this aggressively in
// happy paths (Pelias up + healthy is the cheapest case), but on
// cold-start every entry is missing so the first request pays for
// one health probe per provider.
// happy paths (photon-self up + healthy is the cheapest case),
// but on cold-start every entry is missing so the first request
// pays for one health probe per provider.
const healthy = await provider.health(signal);
this.health.set(provider.name, { healthy, checkedAt: now });
if (!healthy) {

View file

@ -6,9 +6,9 @@
* search/reverse. A custom `User-Agent` is required (Nominatim returns
* 403 to default-UA fetches).
*
* Compared to Pelias/Photon, Nominatim returns a single flat array
* rather than GeoJSON. We adapt the shape and synthesize a confidence
* score from `importance`.
* Unlike Photon, Nominatim returns a single flat array rather than
* GeoJSON. We adapt the shape and synthesize a confidence score from
* `importance`.
*
* https://nominatim.org/release-docs/develop/api/Search/
* https://operations.osmfoundation.org/policies/nominatim/

View file

@ -1,178 +0,0 @@
/**
* Pelias provider primary backend, self-hosted with the DACH OSM index.
*
* Forward-search uses /autocomplete first (fast venue match) and falls
* back to /search if autocomplete returns zero features (autocomplete
* deliberately excludes the address layer for perf).
*/
import { mapPeliasToPlaceCategory } from '../lib/category-map';
import type {
GeocodingProvider,
GeocodingResult,
ProviderResponse,
ReverseRequest,
SearchRequest,
} from './types';
export interface PeliasConfig {
apiUrl: string;
timeoutMs: number;
}
export class PeliasProvider implements GeocodingProvider {
readonly name = 'pelias' as const;
readonly privacy = 'local' as const;
constructor(private readonly config: PeliasConfig) {}
async search(req: SearchRequest, signal?: AbortSignal): Promise<ProviderResponse> {
const params = new URLSearchParams({
text: req.q.trim(),
size: String(req.limit),
lang: req.lang,
});
if (req.focusLat && req.focusLon) {
params.set('focus.point.lat', req.focusLat);
params.set('focus.point.lon', req.focusLon);
}
// /autocomplete first (fast venue match), then /search if empty.
// Both attempts are wrapped in the same external timeout signal so
// a cumulative slow Pelias still falls through to the next provider.
try {
const ac = await this.fetch(`/autocomplete?${params}`, signal);
if (!ac.ok) return { ok: false, kind: 'unreachable', status: ac.status };
let features = ac.features;
if (features.length === 0) {
const s = await this.fetch(`/search?${params}`, signal);
if (s.ok) features = s.features;
// /search returning a non-OK after /autocomplete returned OK-but-empty
// is a clean zero-results answer, not a fall-through. We trust the
// successful autocomplete probe.
}
return { ok: true, results: features.map(normalizePeliasFeature) };
} catch (e) {
return { ok: false, kind: 'unreachable', error: errorMessage(e) };
}
}
async reverse(req: ReverseRequest, signal?: AbortSignal): Promise<ProviderResponse> {
const params = new URLSearchParams({
'point.lat': req.lat,
'point.lon': req.lon,
size: '3',
lang: req.lang,
});
try {
const r = await this.fetch(`/reverse?${params}`, signal);
if (!r.ok) return { ok: false, kind: 'unreachable', status: r.status };
return { ok: true, results: r.features.map(normalizePeliasFeature) };
} catch (e) {
return { ok: false, kind: 'unreachable', error: errorMessage(e) };
}
}
async health(signal?: AbortSignal): Promise<boolean> {
try {
const url = `${this.config.apiUrl}/status`;
const res = await fetch(url, {
signal: combineSignals(signal, AbortSignal.timeout(this.config.timeoutMs)),
});
// /v1/status doesn't exist on every Pelias version — a 404 still
// means the server is up. Anything else (5xx, ECONNREFUSED, timeout)
// is unhealthy.
return res.ok || res.status === 404;
} catch {
return false;
}
}
private async fetch(
path: string,
signal?: AbortSignal
): Promise<{ ok: boolean; status: number; features: PeliasFeature[] }> {
const res = await fetch(`${this.config.apiUrl}${path}`, {
signal: combineSignals(signal, AbortSignal.timeout(this.config.timeoutMs)),
});
if (!res.ok) return { ok: false, status: res.status, features: [] };
const data = (await res.json()) as PeliasResponse;
return { ok: true, status: res.status, features: data.features ?? [] };
}
}
// --- Pelias native types ---
interface PeliasResponse {
type: 'FeatureCollection';
features: PeliasFeature[];
}
interface PeliasFeature {
type: 'Feature';
geometry: {
type: 'Point';
coordinates: [number, number]; // [lon, lat]
};
properties: {
id?: string;
name?: string;
label?: string;
confidence?: number;
layer?: string;
street?: string;
housenumber?: string;
postalcode?: string;
locality?: string;
region?: string;
country?: string;
category?: string[];
};
}
export function normalizePeliasFeature(feature: PeliasFeature): GeocodingResult {
const props = feature.properties;
const [lon, lat] = feature.geometry.coordinates;
return {
label: props.label || props.name || '',
name: props.name || '',
latitude: lat,
longitude: lon,
address: {
street: props.street,
houseNumber: props.housenumber,
postalCode: props.postalcode,
city: props.locality,
state: props.region,
country: props.country,
},
category: mapPeliasToPlaceCategory(props.category, props.layer),
peliasCategories: props.category,
confidence: props.confidence ?? 0,
provider: 'pelias',
};
}
function errorMessage(e: unknown): string {
return e instanceof Error ? e.message : String(e);
}
/** Combine an external AbortSignal with our own timeout signal. AbortSignal.any
* exists in Bun but TS typing is patchy across runtimes small helper. */
function combineSignals(...signals: Array<AbortSignal | undefined>): AbortSignal {
const real = signals.filter((s): s is AbortSignal => !!s);
if (real.length === 1) return real[0];
const ctrl = new AbortController();
for (const s of real) {
if (s.aborted) {
ctrl.abort(s.reason);
break;
}
s.addEventListener('abort', () => ctrl.abort(s.reason), { once: true });
}
return ctrl.signal;
}

View file

@ -5,15 +5,10 @@
* importer). The HTTP shape is GeoJSON FeatureCollection with `properties`
* holding `osm_key`/`osm_value` raw OSM tags + structured address fields.
*
* Compared to Pelias:
* + No rate limit advertised, but be a polite neighbor: short timeouts,
* no retries, cache aggressively.
* + Reverse geocoding takes lon/lat (note the order different from
* Pelias's point.lat/point.lon). Easy to flip if not careful.
* - No `confidence` field. We approximate from `importance` (01) when
* present, else 0.5 as a neutral default.
* - No DACH-specific tuning German venue names sometimes lose umlauts
* in display labels. Acceptable for a fallback.
* Same class powers both `photon-self` (self-hosted, privacy: 'local')
* and `photon` (public komoot.io, privacy: 'public'). Reverse-geocoding
* takes lon/lat (note the order). Confidence is approximated from
* `importance` (01) when present, else 0.5 as a neutral default.
*/
import { mapOsmTagToPlaceCategory } from '../lib/osm-category-map';
@ -207,9 +202,6 @@ export function normalizePhotonFeature(
country: props.country,
},
category,
// peliasCategories deliberately omitted — Photon has osm_key:osm_value
// but the consumer side keys off the absence of this field as a
// "result came from a fallback" signal.
confidence: typeof props.importance === 'number' ? props.importance : 0.5,
provider: providerName,
};

View file

@ -29,12 +29,8 @@ export interface GeocodingResult {
};
/** Our Places category, derived from the provider's native taxonomy. */
category: PlaceCategory;
/** Raw Pelias categories (food, retail, transport, ) only present
* when the result came from Pelias. Photon/Nominatim don't have an
* equivalent multi-tag taxonomy. */
peliasCategories?: string[];
/** Confidence score 01. Pelias provides this natively; Photon/Nominatim
* approximate it from `importance`. */
/** Confidence score 01. Photon/Nominatim approximate it from
* `importance`. */
confidence: number;
/** Which provider answered useful for telemetry + UI hints
* ("approximate match" badge for fallback providers). */
@ -42,8 +38,8 @@ export interface GeocodingResult {
}
/**
* Provider identifiers. Two of these wrap the same `PhotonProvider`
* class with different configs:
* Provider identifiers. `photon-self` and `photon` both wrap the same
* `PhotonProvider` class with different configs:
*
* - `photon-self`: self-hosted Photon (typically on mana-gpu),
* `privacy: 'local'`. Eligible for sensitive queries.
@ -55,7 +51,7 @@ export interface GeocodingResult {
* tracks per-provider health. A single `photon` slot can't simultaneously
* mean two different backends.
*/
export type ProviderName = 'pelias' | 'photon-self' | 'photon' | 'nominatim';
export type ProviderName = 'photon-self' | 'photon' | 'nominatim';
export interface SearchRequest {
q: string;

View file

@ -21,10 +21,10 @@ interface CachedAnswer {
}
/**
* TTL chooser. Public-API results (Photon/Nominatim) get the longer TTL
* caching aggressively is the main privacy lever once the query has
* already left our network. Local results stay on the shorter TTL because
* the Pelias index can be re-imported; we don't want stale local data.
* TTL chooser. Public-API results (photon/nominatim) get a shorter TTL
* (1h) so a transient blip in photon-self doesn't pin stale fallback
* answers in the cache for days. Local results (photon-self) get the
* longer 24h TTL.
*
* Sensitive-query notices are cached on the short TTL too (the user might
* retry from a different angle quickly), and `undefined` provider (chain

View file

@ -9,35 +9,43 @@ export function createHealthRoutes(config: Config, chain: ProviderChain) {
app.get('/', (c) => c.json({ status: 'ok', service: 'mana-geocoding' }));
/**
* Upstream Pelias health. Proxies a request to the Pelias API so
* monitoring can reach it without `extra_hosts: host.docker.internal`
* on the blackbox exporter.
* Upstream photon-self health. Proxies a request to the self-hosted
* Photon so monitoring can reach it without `extra_hosts:
* host.docker.internal` on the blackbox exporter.
*
* Backwards-compatible: existing prometheus probes against this
* endpoint keep working. Now reports `degraded` (200) instead of `down`
* (503) when Pelias is unreachable but a fallback provider is healthy
* the system can still serve queries, just slower / less precise.
* Reports `degraded` (200) instead of `down` (503) when photon-self is
* unreachable but a public fallback (photon / nominatim) is healthy
* the system can still serve queries, just at the cost of leaking the
* query content to a third party.
*/
app.get('/pelias', async (c) => {
app.get('/photon-self', async (c) => {
const upstream = config.photonSelf.apiUrl;
if (!upstream) {
return c.json({ status: 'unconfigured', error: 'PHOTON_SELF_API_URL is unset' }, 503);
}
try {
const res = await fetch(`${config.pelias.apiUrl}/status`, {
const res = await fetch(`${upstream}/api?q=Konstanz&limit=1`, {
signal: AbortSignal.timeout(5000),
});
if (!res.ok && res.status !== 404) {
if (!res.ok) {
return c.json(
{ status: 'degraded', upstream: res.status, fallbackAvailable: chainHasFallback(chain) },
chainHasFallback(chain) ? 200 : 503
{
status: 'degraded',
upstream: res.status,
fallbackAvailable: chainHasPublicFallback(chain),
},
chainHasPublicFallback(chain) ? 200 : 503
);
}
return c.json({ status: 'ok', upstream: 'pelias-api' });
return c.json({ status: 'ok', upstream: 'photon-self' });
} catch (e) {
return c.json(
{
status: chainHasFallback(chain) ? 'degraded' : 'down',
status: chainHasPublicFallback(chain) ? 'degraded' : 'down',
error: e instanceof Error ? e.message : 'unknown',
fallbackAvailable: chainHasFallback(chain),
fallbackAvailable: chainHasPublicFallback(chain),
},
chainHasFallback(chain) ? 200 : 503
chainHasPublicFallback(chain) ? 200 : 503
);
}
});
@ -56,10 +64,10 @@ export function createHealthRoutes(config: Config, chain: ProviderChain) {
}
/**
* Check if any non-Pelias provider is currently believed healthy. Used
* to soften /pelias health to "degraded" instead of "down" when a
* fallback can still serve traffic.
* Check if any public fallback provider is currently believed healthy.
* Used to soften /photon-self health to "degraded" instead of "down"
* when a public fallback can still serve traffic.
*/
function chainHasFallback(chain: ProviderChain): boolean {
return chain.getHealthSnapshot().some((p) => p.name !== 'pelias' && p.healthy);
function chainHasPublicFallback(chain: ProviderChain): boolean {
return chain.getHealthSnapshot().some((p) => p.name !== 'photon-self' && p.healthy);
}