feat(geocoding): auto-categorize places via Pelias taxonomy

Pelias hides the 'category' field from API responses unless the
caller filters by categories=... explicitly — a default intended for
keyword search that strips category metadata from address queries.

Patch the Pelias API's geojsonify_place_details.js so the category
array is returned on every feature (food, retail, transport, …),
mounted into the container as a read-only volume override.

Rewrite category-map.ts to map Pelias' OSM taxonomy to our 7
PlaceCategories using a priority-ordered list so a restaurant
tagged ['food','retail','nightlife'] resolves to 'food' (the most
specific), not 'shopping'.

Verified with Konstanz test queries:
  Konzil Restaurant        → food
  Bahnhof Konstanz         → transit
  Physiotherapie-Schule    → work
  MX-Park                  → leisure

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-11 15:48:24 +02:00
parent 1293756bbf
commit e82b5c1449
5 changed files with 195 additions and 156 deletions

View file

@ -31,8 +31,8 @@ export interface GeocodingResult {
country?: string;
};
category: PlaceCategory;
osmCategory?: string;
osmType?: string;
/** Raw Pelias categories (food, retail, transport, …) */
peliasCategories?: string[];
confidence: number;
}

View file

@ -21,6 +21,14 @@ services:
PORT: 4000
volumes:
- ./pelias.json:/code/pelias.json:ro
# Patch: always return the `category` field in API responses, not only
# when a `categories=...` filter is present. Pelias' default
# `checkCategoryParam` hides category from results unless the caller
# filters by it, but we want the OSM taxonomy (food, retail, transport, …)
# on every venue so our Places UI can auto-map it to a PlaceCategory.
# The patched file is generated from the upstream one with
# `sed "s|condition: checkCategoryParam|condition: () => true|"`
- ./geojsonify_place_details.js:/code/pelias/api/helper/geojsonify_place_details.js:ro
depends_on:
elasticsearch:
condition: service_healthy

View file

@ -0,0 +1,123 @@
const _ = require('lodash');
const field = require('./fieldValue');
// Properties to be copied
// If a property is identified as a single string, assume it should be presented as a string in response
// If something other than string is desired, use the following structure: { name: 'category', type: 'array' }
const DETAILS_PROPS = [
{ name: 'unit', type: 'string' },
{ name: 'housenumber', type: 'string' },
{ name: 'street', type: 'string' },
{ name: 'postalcode', type: 'string' },
{ name: 'postalcode_gid', type: 'string' },
{ name: 'confidence', type: 'default' },
{ name: 'match_type', type: 'string' },
{ name: 'distance', type: 'default' },
{ name: 'accuracy', type: 'string' },
{ name: 'country', type: 'string' },
{ name: 'country_gid', type: 'string' },
{ name: 'country_a', type: 'string' },
{ name: 'dependency', type: 'string' },
{ name: 'dependency_gid', type: 'string' },
{ name: 'dependency_a', type: 'string' },
{ name: 'macroregion', type: 'string' },
{ name: 'macroregion_gid', type: 'string' },
{ name: 'macroregion_a', type: 'string' },
{ name: 'region', type: 'string' },
{ name: 'region_gid', type: 'string' },
{ name: 'region_a', type: 'string' },
{ name: 'macrocounty', type: 'string' },
{ name: 'macrocounty_gid', type: 'string' },
{ name: 'macrocounty_a', type: 'string' },
{ name: 'county', type: 'string' },
{ name: 'county_gid', type: 'string' },
{ name: 'county_a', type: 'string' },
{ name: 'localadmin', type: 'string' },
{ name: 'localadmin_gid', type: 'string' },
{ name: 'localadmin_a', type: 'string' },
{ name: 'locality', type: 'string' },
{ name: 'locality_gid', type: 'string' },
{ name: 'locality_a', type: 'string' },
{ name: 'borough', type: 'string' },
{ name: 'borough_gid', type: 'string' },
{ name: 'borough_a', type: 'string' },
{ name: 'neighbourhood', type: 'string' },
{ name: 'neighbourhood_gid', type: 'string' },
{ name: 'continent', type: 'string' },
{ name: 'continent_gid', type: 'string' },
{ name: 'continent_a', type: 'string' },
{ name: 'empire', type: 'string', condition: _.negate(hasCountry) },
{ name: 'empire_gid', type: 'string', condition: _.negate(hasCountry) },
{ name: 'empire_a', type: 'string', condition: _.negate(hasCountry) },
{ name: 'ocean', type: 'string' },
{ name: 'ocean_gid', type: 'string' },
{ name: 'ocean_a', type: 'string' },
{ name: 'marinearea', type: 'string' },
{ name: 'marinearea_gid', type: 'string' },
{ name: 'marinearea_a', type: 'string' },
{ name: 'bounding_box', type: 'default' },
{ name: 'label', type: 'string' },
{ name: 'category', type: 'array', condition: () => true },
];
const EXTENDED_PROPS = DETAILS_PROPS.concat([
{ name: 'population', type: 'default' },
{ name: 'popularity', type: 'default' },
]);
// returns true IFF source a country_gid property
function hasCountry(params, source) {
return source.hasOwnProperty('country_gid');
}
function checkCategoryParam(params) {
return _.isObject(params) && params.hasOwnProperty('categories');
}
/**
* Collect the specified properties from source into an object and return it
* Ignore missing properties.
*
* @param {object} params clean query params
* @param {object} source
* @param {object} dst
*/
function collectProperties(params, source) {
let props = DETAILS_PROPS;
// extended properties when debugging mode is enabled
if (params.enableDebug === true) {
props = EXTENDED_PROPS;
}
return props.reduce((result, prop) => {
// if condition isn't met, don't set the property
if (_.isFunction(prop.condition) && !prop.condition(params, source)) {
return result;
}
if (source.hasOwnProperty(prop.name)) {
let value = null;
switch (prop.type) {
case 'string':
value = field.getStringValue(source[prop.name]);
break;
case 'array':
value = field.getArrayValue(source[prop.name]);
break;
// default behavior is to copy property exactly as is
default:
value = source[prop.name];
}
if (_.isNumber(value) || (value && !_.isEmpty(value))) {
result[prop.name] = value;
}
}
return result;
}, {});
}
module.exports = collectProperties;

View file

@ -1,156 +1,75 @@
/**
* Maps Pelias/OSM categories to our 7 Places categories.
* Maps Pelias categories (OSM taxonomy) to our 7 Places categories.
*
* Pelias returns results with `addendum.osm.category` and `addendum.osm.type`
* fields that correspond to OSM key/value pairs. We map these to our simple
* category enum: home, work, food, shopping, transit, leisure, other.
* Pelias' openstreetmap importer tags venues with categories from its
* built-in taxonomy (food, retail, transport, health, education, ).
* We collapse those into the simpler Places enum:
*
* home · work · food · shopping · transit · leisure · other
*
* A venue can have multiple Pelias categories (e.g. a restaurant is
* tagged `['food', 'retail', 'nightlife']`). We pick the most specific
* one in priority order rather than the first a restaurant should be
* "food" even though "retail" also matches.
*/
export type PlaceCategory = 'home' | 'work' | 'food' | 'shopping' | 'transit' | 'leisure' | 'other';
/**
* OSM key PlaceCategory mapping.
* The key is the OSM tag key (e.g. "amenity", "shop"), the value maps
* specific OSM values to our categories. A `_default` entry covers
* any value not explicitly listed.
* Priority-ordered: first matching category wins. Earlier entries are
* more specific, so "food" beats "retail", "transport" beats "professional".
*/
const OSM_CATEGORY_MAP: Record<
string,
Record<string, PlaceCategory> & { _default?: PlaceCategory }
> = {
amenity: {
_default: 'other',
restaurant: 'food',
cafe: 'food',
fast_food: 'food',
bar: 'food',
pub: 'food',
biergarten: 'food',
food_court: 'food',
ice_cream: 'food',
bakery: 'food',
school: 'work',
university: 'work',
college: 'work',
library: 'work',
coworking_space: 'work',
office: 'work',
bus_station: 'transit',
ferry_terminal: 'transit',
taxi: 'transit',
parking: 'transit',
fuel: 'transit',
bicycle_parking: 'transit',
charging_station: 'transit',
cinema: 'leisure',
theatre: 'leisure',
nightclub: 'leisure',
community_centre: 'leisure',
swimming_pool: 'leisure',
marketplace: 'shopping',
},
shop: {
_default: 'shopping',
supermarket: 'shopping',
bakery: 'food',
butcher: 'food',
deli: 'food',
greengrocer: 'food',
seafood: 'food',
pastry: 'food',
cheese: 'food',
coffee: 'food',
},
tourism: {
_default: 'leisure',
hotel: 'other',
hostel: 'other',
guest_house: 'other',
motel: 'other',
apartment: 'home',
},
leisure: {
_default: 'leisure',
park: 'leisure',
playground: 'leisure',
sports_centre: 'leisure',
fitness_centre: 'leisure',
stadium: 'leisure',
swimming_pool: 'leisure',
garden: 'leisure',
nature_reserve: 'leisure',
beach_resort: 'leisure',
marina: 'leisure',
},
railway: {
_default: 'transit',
station: 'transit',
halt: 'transit',
tram_stop: 'transit',
},
aeroway: {
_default: 'transit',
aerodrome: 'transit',
terminal: 'transit',
},
highway: {
_default: 'transit',
bus_stop: 'transit',
},
building: {
_default: 'other',
residential: 'home',
house: 'home',
apartments: 'home',
detached: 'home',
commercial: 'work',
office: 'work',
industrial: 'work',
retail: 'shopping',
supermarket: 'shopping',
church: 'leisure',
cathedral: 'leisure',
stadium: 'leisure',
school: 'work',
university: 'work',
hospital: 'other',
},
office: {
_default: 'work',
},
sport: {
_default: 'leisure',
},
};
const PELIAS_PRIORITY: Array<[string, PlaceCategory]> = [
// Food is strongest signal — a restaurant is food, not retail
['food', 'food'],
// Transit/transport
['transport:public', 'transit'],
['transport:air', 'transit'],
['transport:sea', 'transit'],
['transport:bus', 'transit'],
['transport:taxi', 'transit'],
['transport', 'transit'],
// Shopping — explicit retail markers
['retail', 'shopping'],
// Leisure / entertainment / recreation
['entertainment', 'leisure'],
['nightlife', 'leisure'],
['recreation', 'leisure'],
// Work-ish
['education', 'work'],
['professional', 'work'],
['government', 'work'],
['finance', 'work'],
// Health/religion fall through to other
['health', 'other'],
['religion', 'other'],
];
/**
* Derive a PlaceCategory from a Pelias result's OSM metadata.
* Derive a PlaceCategory from a Pelias feature's category array.
*
* Pelias provides category info in several fields depending on the data source.
* We check them in order of specificity.
* @param categories The `category` array from a Pelias feature's properties
* @param peliasLayer The Pelias layer (venue, address, street, ) used as fallback hint
*/
export function mapOsmToPlaceCategory(
osmCategory?: string,
osmType?: string,
export function mapPeliasToPlaceCategory(
categories?: string[] | null,
peliasLayer?: string
): PlaceCategory {
// Try direct OSM key/value mapping first
if (osmCategory && osmType) {
const categoryMap = OSM_CATEGORY_MAP[osmCategory];
if (categoryMap) {
return categoryMap[osmType] ?? categoryMap._default ?? 'other';
if (Array.isArray(categories) && categories.length > 0) {
// Walk our priority list and pick the first match
for (const [peliasCat, placeCat] of PELIAS_PRIORITY) {
if (categories.includes(peliasCat)) return placeCat;
}
}
// Try just the OSM key as a category
if (osmCategory) {
const categoryMap = OSM_CATEGORY_MAP[osmCategory];
if (categoryMap?._default) {
return categoryMap._default;
}
}
// Fallback: use Pelias layer as a hint
// Fallback: use Pelias layer as a hint. Addresses/streets/regions
// all land in "other" since they aren't really "places" in the
// categorical sense.
if (peliasLayer) {
switch (peliasLayer) {
case 'venue':

View file

@ -10,7 +10,7 @@
import { Hono } from 'hono';
import type { Config } from '../config';
import { LRUCache } from '../lib/cache';
import { mapOsmToPlaceCategory, type PlaceCategory } from '../lib/category-map';
import { mapPeliasToPlaceCategory, type PlaceCategory } from '../lib/category-map';
/** Normalized result returned to the client */
export interface GeocodingResult {
@ -29,12 +29,10 @@ export interface GeocodingResult {
state?: string;
country?: string;
};
/** Our Places category, derived from OSM tags */
/** Our Places category, derived from Pelias taxonomy */
category: PlaceCategory;
/** Raw OSM category key (e.g. "amenity") */
osmCategory?: string;
/** Raw OSM type value (e.g. "restaurant") */
osmType?: string;
/** Raw Pelias categories (food, retail, transport, …) */
peliasCategories?: string[];
/** Pelias confidence score 0-1 */
confidence: number;
}
@ -174,12 +172,7 @@ interface PeliasFeature {
locality?: string;
region?: string;
country?: string;
addendum?: {
osm?: {
category?: string;
type?: string;
};
};
category?: string[];
};
}
@ -187,9 +180,6 @@ function normalizePeliasFeature(feature: PeliasFeature): GeocodingResult {
const props = feature.properties;
const [lon, lat] = feature.geometry.coordinates;
const osmCategory = props.addendum?.osm?.category;
const osmType = props.addendum?.osm?.type;
return {
label: props.label || props.name || '',
name: props.name || '',
@ -203,9 +193,8 @@ function normalizePeliasFeature(feature: PeliasFeature): GeocodingResult {
state: props.region,
country: props.country,
},
category: mapOsmToPlaceCategory(osmCategory, osmType, props.layer),
osmCategory,
osmType,
category: mapPeliasToPlaceCategory(props.category, props.layer),
peliasCategories: props.category,
confidence: props.confidence ?? 0,
};
}