From e82b5c1449ede1acf1aa0a7dd2dc8a92a51b7e50 Mon Sep 17 00:00:00 2001 From: Till JS Date: Sat, 11 Apr 2026 15:48:24 +0200 Subject: [PATCH] feat(geocoding): auto-categorize places via Pelias taxonomy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pelias hides the 'category' field from API responses unless the caller filters by categories=... explicitly — a default intended for keyword search that strips category metadata from address queries. Patch the Pelias API's geojsonify_place_details.js so the category array is returned on every feature (food, retail, transport, …), mounted into the container as a read-only volume override. Rewrite category-map.ts to map Pelias' OSM taxonomy to our 7 PlaceCategories using a priority-ordered list so a restaurant tagged ['food','retail','nightlife'] resolves to 'food' (the most specific), not 'shopping'. Verified with Konstanz test queries: Konzil Restaurant → food Bahnhof Konstanz → transit Physiotherapie-Schule → work MX-Park → leisure Co-Authored-By: Claude Opus 4.6 (1M context) --- .../web/src/lib/modules/places/geocoding.ts | 4 +- .../mana-geocoding/pelias/docker-compose.yml | 8 + .../pelias/geojsonify_place_details.js | 123 +++++++++++ .../mana-geocoding/src/lib/category-map.ts | 191 +++++------------- services/mana-geocoding/src/routes/geocode.ts | 25 +-- 5 files changed, 195 insertions(+), 156 deletions(-) create mode 100644 services/mana-geocoding/pelias/geojsonify_place_details.js diff --git a/apps/mana/apps/web/src/lib/modules/places/geocoding.ts b/apps/mana/apps/web/src/lib/modules/places/geocoding.ts index 3b748719c..480187665 100644 --- a/apps/mana/apps/web/src/lib/modules/places/geocoding.ts +++ b/apps/mana/apps/web/src/lib/modules/places/geocoding.ts @@ -31,8 +31,8 @@ export interface GeocodingResult { country?: string; }; category: PlaceCategory; - osmCategory?: string; - osmType?: string; + /** Raw Pelias categories (food, retail, transport, …) */ + peliasCategories?: string[]; confidence: number; } diff --git a/services/mana-geocoding/pelias/docker-compose.yml b/services/mana-geocoding/pelias/docker-compose.yml index 63df0554d..dd009b6c8 100644 --- a/services/mana-geocoding/pelias/docker-compose.yml +++ b/services/mana-geocoding/pelias/docker-compose.yml @@ -21,6 +21,14 @@ services: PORT: 4000 volumes: - ./pelias.json:/code/pelias.json:ro + # Patch: always return the `category` field in API responses, not only + # when a `categories=...` filter is present. Pelias' default + # `checkCategoryParam` hides category from results unless the caller + # filters by it, but we want the OSM taxonomy (food, retail, transport, …) + # on every venue so our Places UI can auto-map it to a PlaceCategory. + # The patched file is generated from the upstream one with + # `sed "s|condition: checkCategoryParam|condition: () => true|"` + - ./geojsonify_place_details.js:/code/pelias/api/helper/geojsonify_place_details.js:ro depends_on: elasticsearch: condition: service_healthy diff --git a/services/mana-geocoding/pelias/geojsonify_place_details.js b/services/mana-geocoding/pelias/geojsonify_place_details.js new file mode 100644 index 000000000..6d209ddad --- /dev/null +++ b/services/mana-geocoding/pelias/geojsonify_place_details.js @@ -0,0 +1,123 @@ +const _ = require('lodash'); +const field = require('./fieldValue'); + +// Properties to be copied +// If a property is identified as a single string, assume it should be presented as a string in response +// If something other than string is desired, use the following structure: { name: 'category', type: 'array' } +const DETAILS_PROPS = [ + { name: 'unit', type: 'string' }, + { name: 'housenumber', type: 'string' }, + { name: 'street', type: 'string' }, + { name: 'postalcode', type: 'string' }, + { name: 'postalcode_gid', type: 'string' }, + { name: 'confidence', type: 'default' }, + { name: 'match_type', type: 'string' }, + { name: 'distance', type: 'default' }, + { name: 'accuracy', type: 'string' }, + { name: 'country', type: 'string' }, + { name: 'country_gid', type: 'string' }, + { name: 'country_a', type: 'string' }, + { name: 'dependency', type: 'string' }, + { name: 'dependency_gid', type: 'string' }, + { name: 'dependency_a', type: 'string' }, + { name: 'macroregion', type: 'string' }, + { name: 'macroregion_gid', type: 'string' }, + { name: 'macroregion_a', type: 'string' }, + { name: 'region', type: 'string' }, + { name: 'region_gid', type: 'string' }, + { name: 'region_a', type: 'string' }, + { name: 'macrocounty', type: 'string' }, + { name: 'macrocounty_gid', type: 'string' }, + { name: 'macrocounty_a', type: 'string' }, + { name: 'county', type: 'string' }, + { name: 'county_gid', type: 'string' }, + { name: 'county_a', type: 'string' }, + { name: 'localadmin', type: 'string' }, + { name: 'localadmin_gid', type: 'string' }, + { name: 'localadmin_a', type: 'string' }, + { name: 'locality', type: 'string' }, + { name: 'locality_gid', type: 'string' }, + { name: 'locality_a', type: 'string' }, + { name: 'borough', type: 'string' }, + { name: 'borough_gid', type: 'string' }, + { name: 'borough_a', type: 'string' }, + { name: 'neighbourhood', type: 'string' }, + { name: 'neighbourhood_gid', type: 'string' }, + { name: 'continent', type: 'string' }, + { name: 'continent_gid', type: 'string' }, + { name: 'continent_a', type: 'string' }, + { name: 'empire', type: 'string', condition: _.negate(hasCountry) }, + { name: 'empire_gid', type: 'string', condition: _.negate(hasCountry) }, + { name: 'empire_a', type: 'string', condition: _.negate(hasCountry) }, + { name: 'ocean', type: 'string' }, + { name: 'ocean_gid', type: 'string' }, + { name: 'ocean_a', type: 'string' }, + { name: 'marinearea', type: 'string' }, + { name: 'marinearea_gid', type: 'string' }, + { name: 'marinearea_a', type: 'string' }, + { name: 'bounding_box', type: 'default' }, + { name: 'label', type: 'string' }, + { name: 'category', type: 'array', condition: () => true }, +]; + +const EXTENDED_PROPS = DETAILS_PROPS.concat([ + { name: 'population', type: 'default' }, + { name: 'popularity', type: 'default' }, +]); + +// returns true IFF source a country_gid property +function hasCountry(params, source) { + return source.hasOwnProperty('country_gid'); +} + +function checkCategoryParam(params) { + return _.isObject(params) && params.hasOwnProperty('categories'); +} + +/** + * Collect the specified properties from source into an object and return it + * Ignore missing properties. + * + * @param {object} params clean query params + * @param {object} source + * @param {object} dst + */ +function collectProperties(params, source) { + let props = DETAILS_PROPS; + + // extended properties when debugging mode is enabled + if (params.enableDebug === true) { + props = EXTENDED_PROPS; + } + + return props.reduce((result, prop) => { + // if condition isn't met, don't set the property + if (_.isFunction(prop.condition) && !prop.condition(params, source)) { + return result; + } + + if (source.hasOwnProperty(prop.name)) { + let value = null; + + switch (prop.type) { + case 'string': + value = field.getStringValue(source[prop.name]); + break; + case 'array': + value = field.getArrayValue(source[prop.name]); + break; + // default behavior is to copy property exactly as is + default: + value = source[prop.name]; + } + + if (_.isNumber(value) || (value && !_.isEmpty(value))) { + result[prop.name] = value; + } + } + + return result; + }, {}); +} + +module.exports = collectProperties; diff --git a/services/mana-geocoding/src/lib/category-map.ts b/services/mana-geocoding/src/lib/category-map.ts index 156d81aaa..9aa1a4317 100644 --- a/services/mana-geocoding/src/lib/category-map.ts +++ b/services/mana-geocoding/src/lib/category-map.ts @@ -1,156 +1,75 @@ /** - * Maps Pelias/OSM categories to our 7 Places categories. + * Maps Pelias categories (OSM taxonomy) to our 7 Places categories. * - * Pelias returns results with `addendum.osm.category` and `addendum.osm.type` - * fields that correspond to OSM key/value pairs. We map these to our simple - * category enum: home, work, food, shopping, transit, leisure, other. + * Pelias' openstreetmap importer tags venues with categories from its + * built-in taxonomy (food, retail, transport, health, education, …). + * We collapse those into the simpler Places enum: + * + * home · work · food · shopping · transit · leisure · other + * + * A venue can have multiple Pelias categories (e.g. a restaurant is + * tagged `['food', 'retail', 'nightlife']`). We pick the most specific + * one in priority order rather than the first — a restaurant should be + * "food" even though "retail" also matches. */ export type PlaceCategory = 'home' | 'work' | 'food' | 'shopping' | 'transit' | 'leisure' | 'other'; /** - * OSM key → PlaceCategory mapping. - * The key is the OSM tag key (e.g. "amenity", "shop"), the value maps - * specific OSM values to our categories. A `_default` entry covers - * any value not explicitly listed. + * Priority-ordered: first matching category wins. Earlier entries are + * more specific, so "food" beats "retail", "transport" beats "professional". */ -const OSM_CATEGORY_MAP: Record< - string, - Record & { _default?: PlaceCategory } -> = { - amenity: { - _default: 'other', - restaurant: 'food', - cafe: 'food', - fast_food: 'food', - bar: 'food', - pub: 'food', - biergarten: 'food', - food_court: 'food', - ice_cream: 'food', - bakery: 'food', - school: 'work', - university: 'work', - college: 'work', - library: 'work', - coworking_space: 'work', - office: 'work', - bus_station: 'transit', - ferry_terminal: 'transit', - taxi: 'transit', - parking: 'transit', - fuel: 'transit', - bicycle_parking: 'transit', - charging_station: 'transit', - cinema: 'leisure', - theatre: 'leisure', - nightclub: 'leisure', - community_centre: 'leisure', - swimming_pool: 'leisure', - marketplace: 'shopping', - }, - shop: { - _default: 'shopping', - supermarket: 'shopping', - bakery: 'food', - butcher: 'food', - deli: 'food', - greengrocer: 'food', - seafood: 'food', - pastry: 'food', - cheese: 'food', - coffee: 'food', - }, - tourism: { - _default: 'leisure', - hotel: 'other', - hostel: 'other', - guest_house: 'other', - motel: 'other', - apartment: 'home', - }, - leisure: { - _default: 'leisure', - park: 'leisure', - playground: 'leisure', - sports_centre: 'leisure', - fitness_centre: 'leisure', - stadium: 'leisure', - swimming_pool: 'leisure', - garden: 'leisure', - nature_reserve: 'leisure', - beach_resort: 'leisure', - marina: 'leisure', - }, - railway: { - _default: 'transit', - station: 'transit', - halt: 'transit', - tram_stop: 'transit', - }, - aeroway: { - _default: 'transit', - aerodrome: 'transit', - terminal: 'transit', - }, - highway: { - _default: 'transit', - bus_stop: 'transit', - }, - building: { - _default: 'other', - residential: 'home', - house: 'home', - apartments: 'home', - detached: 'home', - commercial: 'work', - office: 'work', - industrial: 'work', - retail: 'shopping', - supermarket: 'shopping', - church: 'leisure', - cathedral: 'leisure', - stadium: 'leisure', - school: 'work', - university: 'work', - hospital: 'other', - }, - office: { - _default: 'work', - }, - sport: { - _default: 'leisure', - }, -}; +const PELIAS_PRIORITY: Array<[string, PlaceCategory]> = [ + // Food is strongest signal — a restaurant is food, not retail + ['food', 'food'], + + // Transit/transport + ['transport:public', 'transit'], + ['transport:air', 'transit'], + ['transport:sea', 'transit'], + ['transport:bus', 'transit'], + ['transport:taxi', 'transit'], + ['transport', 'transit'], + + // Shopping — explicit retail markers + ['retail', 'shopping'], + + // Leisure / entertainment / recreation + ['entertainment', 'leisure'], + ['nightlife', 'leisure'], + ['recreation', 'leisure'], + + // Work-ish + ['education', 'work'], + ['professional', 'work'], + ['government', 'work'], + ['finance', 'work'], + + // Health/religion fall through to other + ['health', 'other'], + ['religion', 'other'], +]; /** - * Derive a PlaceCategory from a Pelias result's OSM metadata. + * Derive a PlaceCategory from a Pelias feature's category array. * - * Pelias provides category info in several fields depending on the data source. - * We check them in order of specificity. + * @param categories The `category` array from a Pelias feature's properties + * @param peliasLayer The Pelias layer (venue, address, street, …) — used as fallback hint */ -export function mapOsmToPlaceCategory( - osmCategory?: string, - osmType?: string, +export function mapPeliasToPlaceCategory( + categories?: string[] | null, peliasLayer?: string ): PlaceCategory { - // Try direct OSM key/value mapping first - if (osmCategory && osmType) { - const categoryMap = OSM_CATEGORY_MAP[osmCategory]; - if (categoryMap) { - return categoryMap[osmType] ?? categoryMap._default ?? 'other'; + if (Array.isArray(categories) && categories.length > 0) { + // Walk our priority list and pick the first match + for (const [peliasCat, placeCat] of PELIAS_PRIORITY) { + if (categories.includes(peliasCat)) return placeCat; } } - // Try just the OSM key as a category - if (osmCategory) { - const categoryMap = OSM_CATEGORY_MAP[osmCategory]; - if (categoryMap?._default) { - return categoryMap._default; - } - } - - // Fallback: use Pelias layer as a hint + // Fallback: use Pelias layer as a hint. Addresses/streets/regions + // all land in "other" since they aren't really "places" in the + // categorical sense. if (peliasLayer) { switch (peliasLayer) { case 'venue': diff --git a/services/mana-geocoding/src/routes/geocode.ts b/services/mana-geocoding/src/routes/geocode.ts index 6a5ff4ca2..bb881e96a 100644 --- a/services/mana-geocoding/src/routes/geocode.ts +++ b/services/mana-geocoding/src/routes/geocode.ts @@ -10,7 +10,7 @@ import { Hono } from 'hono'; import type { Config } from '../config'; import { LRUCache } from '../lib/cache'; -import { mapOsmToPlaceCategory, type PlaceCategory } from '../lib/category-map'; +import { mapPeliasToPlaceCategory, type PlaceCategory } from '../lib/category-map'; /** Normalized result returned to the client */ export interface GeocodingResult { @@ -29,12 +29,10 @@ export interface GeocodingResult { state?: string; country?: string; }; - /** Our Places category, derived from OSM tags */ + /** Our Places category, derived from Pelias taxonomy */ category: PlaceCategory; - /** Raw OSM category key (e.g. "amenity") */ - osmCategory?: string; - /** Raw OSM type value (e.g. "restaurant") */ - osmType?: string; + /** Raw Pelias categories (food, retail, transport, …) */ + peliasCategories?: string[]; /** Pelias confidence score 0-1 */ confidence: number; } @@ -174,12 +172,7 @@ interface PeliasFeature { locality?: string; region?: string; country?: string; - addendum?: { - osm?: { - category?: string; - type?: string; - }; - }; + category?: string[]; }; } @@ -187,9 +180,6 @@ function normalizePeliasFeature(feature: PeliasFeature): GeocodingResult { const props = feature.properties; const [lon, lat] = feature.geometry.coordinates; - const osmCategory = props.addendum?.osm?.category; - const osmType = props.addendum?.osm?.type; - return { label: props.label || props.name || '', name: props.name || '', @@ -203,9 +193,8 @@ function normalizePeliasFeature(feature: PeliasFeature): GeocodingResult { state: props.region, country: props.country, }, - category: mapOsmToPlaceCategory(osmCategory, osmType, props.layer), - osmCategory, - osmType, + category: mapPeliasToPlaceCategory(props.category, props.layer), + peliasCategories: props.category, confidence: props.confidence ?? 0, }; }