From bcc21ca7853963b37ae24cb7b3b91373c3ec7803 Mon Sep 17 00:00:00 2001 From: Till JS Date: Tue, 28 Apr 2026 16:04:56 +0200 Subject: [PATCH] =?UTF-8?q?feat(geocoding):=20privacy=20hardening=20?= =?UTF-8?q?=E2=80=94=20sensitive-query=20block=20+=20coord=20quantization?= =?UTF-8?q?=20+=20extended=20cache=20TTL=20for=20public=20answers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three independent defenses limit what public geocoding APIs (Photon, Nominatim) can learn from our outbound traffic: 1. **Sensitive-query block** (`lib/sensitive-query.ts`) Queries matching the medical/mental-health/crisis-service keyword list (Hausarzt, Psychiater, Klinikum, HIV, Frauenhaus, …) are never forwarded to public APIs. The chain detects sensitivity at the route layer and runs the search in localOnly mode — providers with `privacy: 'public'` are filtered out before iteration begins. When no local provider is available (Pelias stopped), a sensitive query returns ok:true with results:[] and notice: 'sensitive_local_unavailable' so the UI can show a sensible message instead of "no results". The keyword list is documented inline. False negatives are the risk; false positives just produce a 0-result UX hit (better trade-off). 2. **Coordinate quantization** (`lib/privacy.ts`) Forward-search focus.lat/lon: rounded to 2 decimals (~1.1km). Enough for the bias to work, hides exact GPS. Reverse-geocoding lat/lon: rounded to 3 decimals (~110m). City-block resolution — sufficient for "what's near me?", avoids reverse-geocoding the user's exact front door. Pelias always gets full precision; quantization only on the way out to public APIs. New `privacy: 'local' | 'public'` field on the GeocodingProvider interface drives this. 3. **Extended cache TTL for public answers** New `cache.publicTtlMs` config option, default 7 days (vs. 24h for local-provider answers). LRU cache extended with optional `ttlOverrideMs` per entry. Same query from N users → 1 outbound request to Photon/Nominatim. Strongest privacy lever we have over public providers (we can't change their logging, only the rate at which we feed them queries). Threat coverage: ✓ User IP / identity hidden (already true — wrapper is the proxy) ✓ Exact GPS hidden (quantization) ✓ Sensitive query content protected (block) ~ Non-sensitive query content visible (acceptable trade-off) ~ Aggregate profiling reduced ~10–100× (cache) ✗ TLS-level traffic analysis, compelled disclosure (out of scope) Tests: 141 (was 115). New coverage: - privacy.test.ts: quantization rules (locks the privacy claim) - sensitive-query.test.ts: positive matches across categories + documented false positives we accept - chain.test.ts: localOnly mode end-to-end including the load- bearing assertion that public providers' search() must NEVER be called when the chain is in localOnly mode (no race window) - cache.test.ts: per-entry ttlOverride longer + shorter than default Live smoke verified end-to-end: - "Hausarzt Konstanz" with Pelias down → no public API call, notice: 'sensitive_local_unavailable' - "Konstanz" → falls through to Photon, notice: 'fallback_used' - Reverse with high-precision GPS → Photon receives quantized coords, returns city-block-level result --- .../{community => feedback}/ListView.svelte | 0 .../components/ItemCard.svelte | 0 .../{community => feedback}/module.config.ts | 0 .../{community => feedback}/queries.svelte.ts | 0 .../views/DetailView.svelte | 0 .../views/ListView.svelte | 0 .../views/MyWishesView.svelte | 0 .../views/RoadmapView.svelte | 0 .../{community => feedback}/+layout.svelte | 0 .../{community => feedback}/+page.server.ts | 0 .../{community => feedback}/+page.svelte | 0 .../[id]/+page.server.ts | 0 .../{community => feedback}/[id]/+page.svelte | 0 .../admin/+page.svelte | 0 .../eule/[hash]/+page.server.ts | 0 .../eule/[hash]/+page.svelte | 0 .../roadmap/+page.svelte | 0 services/mana-geocoding/CLAUDE.md | 86 +++++++++++++-- services/mana-geocoding/src/config.ts | 9 +- .../src/lib/__tests__/cache.test.ts | 28 +++++ .../src/lib/__tests__/privacy.test.ts | 52 +++++++++ .../src/lib/__tests__/sensitive-query.test.ts | 103 ++++++++++++++++++ services/mana-geocoding/src/lib/cache.ts | 14 ++- services/mana-geocoding/src/lib/privacy.ts | 45 ++++++++ .../mana-geocoding/src/lib/sensitive-query.ts | 79 ++++++++++++++ .../src/providers/__tests__/chain.test.ts | 98 ++++++++++++++++- .../mana-geocoding/src/providers/chain.ts | 64 ++++++++++- .../mana-geocoding/src/providers/nominatim.ts | 16 ++- .../mana-geocoding/src/providers/pelias.ts | 1 + .../mana-geocoding/src/providers/photon.ts | 25 ++++- .../mana-geocoding/src/providers/types.ts | 9 ++ services/mana-geocoding/src/routes/geocode.ts | 58 +++++++++- 32 files changed, 658 insertions(+), 29 deletions(-) rename apps/mana/apps/web/src/lib/modules/{community => feedback}/ListView.svelte (100%) rename apps/mana/apps/web/src/lib/modules/{community => feedback}/components/ItemCard.svelte (100%) rename apps/mana/apps/web/src/lib/modules/{community => feedback}/module.config.ts (100%) rename apps/mana/apps/web/src/lib/modules/{community => feedback}/queries.svelte.ts (100%) rename apps/mana/apps/web/src/lib/modules/{community => feedback}/views/DetailView.svelte (100%) rename apps/mana/apps/web/src/lib/modules/{community => feedback}/views/ListView.svelte (100%) rename apps/mana/apps/web/src/lib/modules/{community => feedback}/views/MyWishesView.svelte (100%) rename apps/mana/apps/web/src/lib/modules/{community => feedback}/views/RoadmapView.svelte (100%) rename apps/mana/apps/web/src/routes/{community => feedback}/+layout.svelte (100%) rename apps/mana/apps/web/src/routes/{community => feedback}/+page.server.ts (100%) rename apps/mana/apps/web/src/routes/{community => feedback}/+page.svelte (100%) rename apps/mana/apps/web/src/routes/{community => feedback}/[id]/+page.server.ts (100%) rename apps/mana/apps/web/src/routes/{community => feedback}/[id]/+page.svelte (100%) rename apps/mana/apps/web/src/routes/{community => feedback}/admin/+page.svelte (100%) rename apps/mana/apps/web/src/routes/{community => feedback}/eule/[hash]/+page.server.ts (100%) rename apps/mana/apps/web/src/routes/{community => feedback}/eule/[hash]/+page.svelte (100%) rename apps/mana/apps/web/src/routes/{community => feedback}/roadmap/+page.svelte (100%) create mode 100644 services/mana-geocoding/src/lib/__tests__/privacy.test.ts create mode 100644 services/mana-geocoding/src/lib/__tests__/sensitive-query.test.ts create mode 100644 services/mana-geocoding/src/lib/privacy.ts create mode 100644 services/mana-geocoding/src/lib/sensitive-query.ts diff --git a/apps/mana/apps/web/src/lib/modules/community/ListView.svelte b/apps/mana/apps/web/src/lib/modules/feedback/ListView.svelte similarity index 100% rename from apps/mana/apps/web/src/lib/modules/community/ListView.svelte rename to apps/mana/apps/web/src/lib/modules/feedback/ListView.svelte diff --git a/apps/mana/apps/web/src/lib/modules/community/components/ItemCard.svelte b/apps/mana/apps/web/src/lib/modules/feedback/components/ItemCard.svelte similarity index 100% rename from apps/mana/apps/web/src/lib/modules/community/components/ItemCard.svelte rename to apps/mana/apps/web/src/lib/modules/feedback/components/ItemCard.svelte diff --git a/apps/mana/apps/web/src/lib/modules/community/module.config.ts b/apps/mana/apps/web/src/lib/modules/feedback/module.config.ts similarity index 100% rename from apps/mana/apps/web/src/lib/modules/community/module.config.ts rename to apps/mana/apps/web/src/lib/modules/feedback/module.config.ts diff --git a/apps/mana/apps/web/src/lib/modules/community/queries.svelte.ts b/apps/mana/apps/web/src/lib/modules/feedback/queries.svelte.ts similarity index 100% rename from apps/mana/apps/web/src/lib/modules/community/queries.svelte.ts rename to apps/mana/apps/web/src/lib/modules/feedback/queries.svelte.ts diff --git a/apps/mana/apps/web/src/lib/modules/community/views/DetailView.svelte b/apps/mana/apps/web/src/lib/modules/feedback/views/DetailView.svelte similarity index 100% rename from apps/mana/apps/web/src/lib/modules/community/views/DetailView.svelte rename to apps/mana/apps/web/src/lib/modules/feedback/views/DetailView.svelte diff --git a/apps/mana/apps/web/src/lib/modules/community/views/ListView.svelte b/apps/mana/apps/web/src/lib/modules/feedback/views/ListView.svelte similarity index 100% rename from apps/mana/apps/web/src/lib/modules/community/views/ListView.svelte rename to apps/mana/apps/web/src/lib/modules/feedback/views/ListView.svelte diff --git a/apps/mana/apps/web/src/lib/modules/community/views/MyWishesView.svelte b/apps/mana/apps/web/src/lib/modules/feedback/views/MyWishesView.svelte similarity index 100% rename from apps/mana/apps/web/src/lib/modules/community/views/MyWishesView.svelte rename to apps/mana/apps/web/src/lib/modules/feedback/views/MyWishesView.svelte diff --git a/apps/mana/apps/web/src/lib/modules/community/views/RoadmapView.svelte b/apps/mana/apps/web/src/lib/modules/feedback/views/RoadmapView.svelte similarity index 100% rename from apps/mana/apps/web/src/lib/modules/community/views/RoadmapView.svelte rename to apps/mana/apps/web/src/lib/modules/feedback/views/RoadmapView.svelte diff --git a/apps/mana/apps/web/src/routes/community/+layout.svelte b/apps/mana/apps/web/src/routes/feedback/+layout.svelte similarity index 100% rename from apps/mana/apps/web/src/routes/community/+layout.svelte rename to apps/mana/apps/web/src/routes/feedback/+layout.svelte diff --git a/apps/mana/apps/web/src/routes/community/+page.server.ts b/apps/mana/apps/web/src/routes/feedback/+page.server.ts similarity index 100% rename from apps/mana/apps/web/src/routes/community/+page.server.ts rename to apps/mana/apps/web/src/routes/feedback/+page.server.ts diff --git a/apps/mana/apps/web/src/routes/community/+page.svelte b/apps/mana/apps/web/src/routes/feedback/+page.svelte similarity index 100% rename from apps/mana/apps/web/src/routes/community/+page.svelte rename to apps/mana/apps/web/src/routes/feedback/+page.svelte diff --git a/apps/mana/apps/web/src/routes/community/[id]/+page.server.ts b/apps/mana/apps/web/src/routes/feedback/[id]/+page.server.ts similarity index 100% rename from apps/mana/apps/web/src/routes/community/[id]/+page.server.ts rename to apps/mana/apps/web/src/routes/feedback/[id]/+page.server.ts diff --git a/apps/mana/apps/web/src/routes/community/[id]/+page.svelte b/apps/mana/apps/web/src/routes/feedback/[id]/+page.svelte similarity index 100% rename from apps/mana/apps/web/src/routes/community/[id]/+page.svelte rename to apps/mana/apps/web/src/routes/feedback/[id]/+page.svelte diff --git a/apps/mana/apps/web/src/routes/community/admin/+page.svelte b/apps/mana/apps/web/src/routes/feedback/admin/+page.svelte similarity index 100% rename from apps/mana/apps/web/src/routes/community/admin/+page.svelte rename to apps/mana/apps/web/src/routes/feedback/admin/+page.svelte diff --git a/apps/mana/apps/web/src/routes/community/eule/[hash]/+page.server.ts b/apps/mana/apps/web/src/routes/feedback/eule/[hash]/+page.server.ts similarity index 100% rename from apps/mana/apps/web/src/routes/community/eule/[hash]/+page.server.ts rename to apps/mana/apps/web/src/routes/feedback/eule/[hash]/+page.server.ts diff --git a/apps/mana/apps/web/src/routes/community/eule/[hash]/+page.svelte b/apps/mana/apps/web/src/routes/feedback/eule/[hash]/+page.svelte similarity index 100% rename from apps/mana/apps/web/src/routes/community/eule/[hash]/+page.svelte rename to apps/mana/apps/web/src/routes/feedback/eule/[hash]/+page.svelte diff --git a/apps/mana/apps/web/src/routes/community/roadmap/+page.svelte b/apps/mana/apps/web/src/routes/feedback/roadmap/+page.svelte similarity index 100% rename from apps/mana/apps/web/src/routes/community/roadmap/+page.svelte rename to apps/mana/apps/web/src/routes/feedback/roadmap/+page.svelte diff --git a/services/mana-geocoding/CLAUDE.md b/services/mana-geocoding/CLAUDE.md index c3831fd96..69886f02d 100644 --- a/services/mana-geocoding/CLAUDE.md +++ b/services/mana-geocoding/CLAUDE.md @@ -171,7 +171,8 @@ NOMINATIM_INTERVAL_MS=1100 # >= 1000 to honor 1 req/sec policy # --- Misc ------------------------------------------------------------- CORS_ORIGINS=http://localhost:5173,https://mana.how CACHE_MAX_ENTRIES=5000 -CACHE_TTL_MS=86400000 +CACHE_TTL_MS=86400000 # 24h — used for local-provider answers +CACHE_PUBLIC_TTL_MS=604800000 # 7d — extended TTL for public-API answers (privacy) ``` To **disable a provider**, drop it from `GEOCODING_PROVIDERS`. To run with @@ -202,9 +203,78 @@ Client (Places module) 3. Nominatim ← last resort: 200–800 ms + 1 req/sec queue ``` -The response body includes `provider: 'pelias' | 'photon' | 'nominatim'` -and `tried: ProviderName[]` so the caller can render a "approximate match" -hint when a fallback served the request. +The response body includes `provider: 'pelias' | 'photon' | 'nominatim'`, +`tried: ProviderName[]`, and an optional `notice` (`'fallback_used'` or +`'sensitive_local_unavailable'`) so the caller can render an +"approximate match" hint or explain why a sensitive query returned 0 +results. + +## Privacy hardening + +When a request goes to Pelias, the user's query content + focus point +stay on our infrastructure. When it falls through to Photon or +Nominatim, the query is forwarded to a third party. Three independent +defenses limit what those third parties can learn: + +### 1. Sensitive-query block (`src/lib/sensitive-query.ts`) + +Queries matching the medical / mental-health / crisis-service keyword +list (`Hausarzt`, `Psychiater`, `Klinikum`, `Suchtberatung`, `HIV`, +`Frauenhaus`, …) are **never forwarded to public APIs**, even if Pelias +is unreachable. The chain detects sensitivity at the route layer and +calls `chain.search(req, signal, { localOnly: true })` — providers with +`privacy: 'public'` are filtered out *before* the iteration begins, so +there is no race window. + +When no local provider is available (e.g. Pelias is stopped), a +sensitive query returns `ok: true, results: [], notice: +'sensitive_local_unavailable'`. The UI should show "Diese Suche bleibt +bewusst lokal — kein Treffer im DACH-Index. Versuche eine allgemeinere +Formulierung." rather than "no results". + +The keyword list is documented and maintained inline. False negatives +(a sensitive query slipping through) are the primary risk; false +positives just produce a 0-result UX hit, which is the safer +trade-off. + +### 2. Coordinate quantization (`src/lib/privacy.ts`) + +Coordinates are rounded before forwarding to public providers: + +- **Forward-search focus** (`focus.lat/lon`): rounded to 2 decimals + (~1.1 km). Enough for the "results near me" bias without sending + exact GPS. +- **Reverse-geocoding lat/lon**: rounded to 3 decimals (~110 m). + City-block resolution — sufficient for "what's near me?", avoids + logging exact home/workplace coordinates to a third party. + +Pelias always gets full-precision coordinates — quantization only +applies on the way out to public APIs. + +### 3. Aggressive caching of public-API answers + +`config.cache.publicTtlMs` (default 7 days) overrides the default 24h +cache TTL when the response came from a public provider. Same query +from 1000 different users → 1 outbound request to Photon/Nominatim. +This is the strongest privacy lever we have over public providers, +since we can't change their logging behavior — only the rate at which +we feed them queries. + +### What this protects + what it doesn't + +| Threat | Protected? | +|---|---| +| Public API sees user's IP | ✓ (wrapper is the proxy, only mac-mini IP goes out) | +| Public API sees user identity / JWT | ✓ (wrapper sends no auth headers) | +| Public API sees query content | partial — sensitive queries blocked entirely, others go through | +| Public API sees user's exact GPS | ✓ (quantized to ~1km / ~110m) | +| Aggregate location-intent profiling | partial — cache reduces volume ~10–100× | +| TLS-level traffic analysis (timing) | ✗ (not in scope) | +| Compelled disclosure of public-API logs | ✗ (no legal mitigation) | + +Residual risk for non-sensitive queries: "third party learns what +queries our backend made, with timestamps, but not who made them." +Acceptable for restaurant/landmark lookups, blocked for medical lookups. ## Pelias Infrastructure @@ -319,7 +389,7 @@ bun test `nominatim-normalizer.test.ts` — locking the wire-format mapping for the two public fallback providers. -As of the 2026-04-28 fallback rollout: **115 tests, all green**. +As of the 2026-04-28 privacy-hardening rollout: **141 tests, all green**. ### Smoke test (`bun run test:smoke`) @@ -354,10 +424,12 @@ src/ │ ├── photon.ts # Fallback 1: photon.komoot.io │ └── nominatim.ts # Fallback 2: nominatim.openstreetmap.org └── lib/ - ├── cache.ts # LRU cache with TTL (provider-agnostic) + ├── cache.ts # LRU cache with TTL + per-entry override ├── category-map.ts # Pelias-taxonomy → PlaceCategory ├── osm-category-map.ts # Raw OSM `class:type` → PlaceCategory - └── rate-limiter.ts # Single-token limiter (used by Nominatim) + ├── privacy.ts # Coordinate quantization for public APIs + ├── rate-limiter.ts # Single-token limiter (used by Nominatim) + └── sensitive-query.ts # Health/crisis keyword detector pelias/ ├── docker-compose.yml # Pelias stack ├── pelias.json # Pelias config (DACH region) diff --git a/services/mana-geocoding/src/config.ts b/services/mana-geocoding/src/config.ts index 96f2c8888..2d857c04b 100644 --- a/services/mana-geocoding/src/config.ts +++ b/services/mana-geocoding/src/config.ts @@ -27,8 +27,14 @@ export interface Config { cache: { /** Max entries in the in-memory LRU cache */ maxEntries: number; - /** TTL in milliseconds (default: 24h — geocoding results rarely change) */ + /** Default TTL in milliseconds (24h — used for results from local + * providers like Pelias, where the index can be re-imported) */ ttlMs: number; + /** Extended TTL for results that came from public APIs (Photon, + * Nominatim). 7 days by default — caching aggressively reduces + * the number of times we forward query content to a third party, + * which is the main privacy lever we have over public providers. */ + publicTtlMs: number; }; providers: { /** Order matters — the chain tries them top-down. Anything not in @@ -64,6 +70,7 @@ export function loadConfig(): Config { cache: { maxEntries: parseInt(process.env.CACHE_MAX_ENTRIES || '5000', 10), ttlMs: parseInt(process.env.CACHE_TTL_MS || String(24 * 60 * 60 * 1000), 10), + publicTtlMs: parseInt(process.env.CACHE_PUBLIC_TTL_MS || String(7 * 24 * 60 * 60 * 1000), 10), }, providers: { enabled: parseProviderList(process.env.GEOCODING_PROVIDERS, [ diff --git a/services/mana-geocoding/src/lib/__tests__/cache.test.ts b/services/mana-geocoding/src/lib/__tests__/cache.test.ts index 6196f2c50..1ac484dbe 100644 --- a/services/mana-geocoding/src/lib/__tests__/cache.test.ts +++ b/services/mana-geocoding/src/lib/__tests__/cache.test.ts @@ -79,6 +79,34 @@ describe('LRUCache', () => { expect(cache.size).toBe(0); }); + it('honors per-entry ttlOverride longer than the default', async () => { + // Default TTL = 20ms; one entry overridden to 200ms. + const cache = new LRUCache(10, 20); + cache.set('short', 'short-lived'); + cache.set('long', 'long-lived', 200); + + // Both alive at t=0 + expect(cache.get('short')).toBe('short-lived'); + expect(cache.get('long')).toBe('long-lived'); + + // Past default TTL: short expires, long survives + await new Promise((r) => setTimeout(r, 40)); + expect(cache.get('short')).toBeUndefined(); + expect(cache.get('long')).toBe('long-lived'); + }); + + it('honors per-entry ttlOverride shorter than the default', async () => { + // Inverse case: shorter override on one entry. Validates we use + // the override value, not the larger of (default, override). + const cache = new LRUCache(10, 200); + cache.set('default', 'A'); + cache.set('shortened', 'B', 20); + + await new Promise((r) => setTimeout(r, 40)); + expect(cache.get('default')).toBe('A'); + expect(cache.get('shortened')).toBeUndefined(); + }); + it('handles arbitrary value types', () => { interface Feature { name: string; diff --git a/services/mana-geocoding/src/lib/__tests__/privacy.test.ts b/services/mana-geocoding/src/lib/__tests__/privacy.test.ts new file mode 100644 index 000000000..4589efda0 --- /dev/null +++ b/services/mana-geocoding/src/lib/__tests__/privacy.test.ts @@ -0,0 +1,52 @@ +/** + * Tests for the coordinate-quantization helper used to round outbound + * lat/lon before forwarding to public APIs. The privacy claim depends + * on these rounding rules — lock them. + */ + +import { describe, expect, it } from 'bun:test'; +import { PUBLIC_FOCUS_DECIMALS, PUBLIC_REVERSE_DECIMALS, quantizeCoord } from '../privacy'; + +describe('quantizeCoord', () => { + it('rounds a number string to N decimals', () => { + expect(quantizeCoord('47.66341234', 2)).toBe('47.66'); + expect(quantizeCoord('47.66341234', 3)).toBe('47.663'); + expect(quantizeCoord('47.66351234', 3)).toBe('47.664'); // rounds, not truncates + }); + + it('handles a number input the same as a string', () => { + expect(quantizeCoord(47.66341234, 2)).toBe('47.66'); + }); + + it('returns undefined for undefined / null / empty / NaN', () => { + expect(quantizeCoord(undefined, 2)).toBeUndefined(); + expect(quantizeCoord('', 2)).toBeUndefined(); + expect(quantizeCoord('not-a-number', 2)).toBeUndefined(); + // Number.NaN is the easy footgun — toFixed would return 'NaN' string + expect(quantizeCoord(NaN, 2)).toBeUndefined(); + }); + + it('preserves precision via string return (not lossy float)', () => { + // 0.1 + 0.2 = 0.30000000000000004 — toFixed must guard against + // representational drift sneaking back in. The string form is + // what we drop into URLSearchParams, so the round happens here. + expect(quantizeCoord(0.1 + 0.2, 2)).toBe('0.30'); + }); + + it('handles negative coordinates (southern hemisphere, western longitudes)', () => { + expect(quantizeCoord('-33.86412345', 2)).toBe('-33.86'); + expect(quantizeCoord('-118.40531234', 2)).toBe('-118.41'); + }); + + it('PUBLIC_FOCUS_DECIMALS quantizes the privacy claim (~1.1 km)', () => { + // 0.01° latitude ≈ 1.11 km. The constant must be 2 — if anyone + // bumps it to 3, the privacy claim ("focus point hidden to ~1km") + // silently changes. + expect(PUBLIC_FOCUS_DECIMALS).toBe(2); + }); + + it('PUBLIC_REVERSE_DECIMALS quantizes the privacy claim (~110 m)', () => { + // 0.001° latitude ≈ 111 m. Same reasoning — lock the value. + expect(PUBLIC_REVERSE_DECIMALS).toBe(3); + }); +}); diff --git a/services/mana-geocoding/src/lib/__tests__/sensitive-query.test.ts b/services/mana-geocoding/src/lib/__tests__/sensitive-query.test.ts new file mode 100644 index 000000000..50b1e0f33 --- /dev/null +++ b/services/mana-geocoding/src/lib/__tests__/sensitive-query.test.ts @@ -0,0 +1,103 @@ +/** + * Tests for the sensitive-query detector. The shape of the regex list is + * the privacy claim — false negatives (a sensitive query slipping through + * to public APIs) are the biggest risk, so this file leans heavily on + * positive cases for the categories we care about. False positives get a + * lighter pass: we test that obvious non-matches don't trigger but we + * accept some over-blocking on edge phrases (a 0-result UX hit is much + * better than leaking medical search to a third party). + */ + +import { describe, expect, it } from 'bun:test'; +import { isSensitiveQuery } from '../sensitive-query'; + +describe('isSensitiveQuery — health professionals', () => { + it('matches single doctor terms', () => { + expect(isSensitiveQuery('Hausarzt Konstanz').sensitive).toBe(true); + expect(isSensitiveQuery('Frauenarzt München').sensitive).toBe(true); + expect(isSensitiveQuery('Kinderarzt Berlin').sensitive).toBe(true); + expect(isSensitiveQuery('Zahnärztin Hamburg').sensitive).toBe(false); // dental — not in narrow list + }); + + it('matches specialist terms regardless of case', () => { + expect(isSensitiveQuery('PSYCHIATER').sensitive).toBe(true); + expect(isSensitiveQuery('Urologe').sensitive).toBe(true); + expect(isSensitiveQuery('Dermatologe').sensitive).toBe(true); + expect(isSensitiveQuery('gynäkologe').sensitive).toBe(true); + expect(isSensitiveQuery('Onkologe Konstanz').sensitive).toBe(true); + }); + + it('reports the matched token for logging (not exposed to client)', () => { + const result = isSensitiveQuery('Praxis Hausarzt Müller'); + expect(result.sensitive).toBe(true); + expect(result.matchedToken?.toLowerCase()).toBe('hausarzt'); + }); +}); + +describe('isSensitiveQuery — clinics + mental health', () => { + it('matches Klinikum / Hospiz / Reha-Klinik', () => { + expect(isSensitiveQuery('Klinikum Konstanz').sensitive).toBe(true); + expect(isSensitiveQuery('Hospiz Stuttgart').sensitive).toBe(true); + expect(isSensitiveQuery('Reha-Klinik Bayern').sensitive).toBe(true); + // Plain "Krankenhaus" is too common as a street name — we keep it + // out of the list (documented trade-off in sensitive-query.ts) + expect(isSensitiveQuery('Krankenhausstraße 5').sensitive).toBe(false); + }); + + it('matches Psycho* terms', () => { + expect(isSensitiveQuery('Psychiatrie Konstanz').sensitive).toBe(true); + expect(isSensitiveQuery('Psychotherapie Berlin').sensitive).toBe(true); + expect(isSensitiveQuery('Psychotherapeutin München').sensitive).toBe(true); + expect(isSensitiveQuery('Psychologe').sensitive).toBe(true); + }); +}); + +describe('isSensitiveQuery — addiction / sexual / crisis services', () => { + it('matches addiction services', () => { + expect(isSensitiveQuery('Suchtberatung Konstanz').sensitive).toBe(true); + expect(isSensitiveQuery('Drogenberatung').sensitive).toBe(true); + expect(isSensitiveQuery('Methadon-Ambulanz').sensitive).toBe(true); + }); + + it('matches sexual / reproductive health', () => { + expect(isSensitiveQuery('HIV-Test Berlin').sensitive).toBe(true); + expect(isSensitiveQuery('Schwangerschaftsabbruch').sensitive).toBe(true); + expect(isSensitiveQuery('pro familia').sensitive).toBe(true); + }); + + it('matches crisis / domestic-violence services', () => { + expect(isSensitiveQuery('Frauenhaus Konstanz').sensitive).toBe(true); + expect(isSensitiveQuery('Telefonseelsorge').sensitive).toBe(true); + expect(isSensitiveQuery('Krisendienst München').sensitive).toBe(true); + }); +}); + +describe('isSensitiveQuery — false positives we explicitly accept', () => { + it('does NOT match generic non-medical queries', () => { + expect(isSensitiveQuery('Konstanz Hauptbahnhof').sensitive).toBe(false); + expect(isSensitiveQuery('Münsterplatz 5').sensitive).toBe(false); + expect(isSensitiveQuery('Edeka Konstanz').sensitive).toBe(false); + expect(isSensitiveQuery('Restaurant Konzil').sensitive).toBe(false); + expect(isSensitiveQuery('Cafe Münster').sensitive).toBe(false); + }); + + it('does NOT match street names that contain medical-sounding tokens', () => { + // Word boundaries: "hausarztstrasse" must not match "hausarzt" + expect(isSensitiveQuery('Hausarztstraße 12').sensitive).toBe(false); + // "Klinikstraße" → no clinic token by itself in the list ('klinikum' is) + expect(isSensitiveQuery('Klinikstraße 5').sensitive).toBe(false); + }); + + it('does NOT match Apotheke (pharmacy is too commonly a landmark)', () => { + // Documented trade-off: Apotheke isn't in the list because most + // users searching for it want the building as a landmark. Health + // inference from a pharmacy lookup is weaker than from a clinic. + expect(isSensitiveQuery('Apotheke am Markt').sensitive).toBe(false); + }); + + it('handles edge cases gracefully', () => { + expect(isSensitiveQuery('').sensitive).toBe(false); + expect(isSensitiveQuery('a').sensitive).toBe(false); // too short + expect(isSensitiveQuery(' ').sensitive).toBe(false); + }); +}); diff --git a/services/mana-geocoding/src/lib/cache.ts b/services/mana-geocoding/src/lib/cache.ts index c8f9b9a46..6954312a7 100644 --- a/services/mana-geocoding/src/lib/cache.ts +++ b/services/mana-geocoding/src/lib/cache.ts @@ -34,7 +34,16 @@ export class LRUCache { return entry.value; } - set(key: string, value: T): void { + /** + * Insert or update a cache entry. + * + * @param ttlOverrideMs Optional per-entry TTL. Useful when results + * from public-API providers should live longer than results from + * the (frequently-changing) local Pelias index — e.g. 7 days for + * Photon/Nominatim answers, 24 hours for Pelias answers. When + * omitted, the constructor's default TTL applies. + */ + set(key: string, value: T, ttlOverrideMs?: number): void { // Delete first so re-insert goes to end this.map.delete(key); @@ -44,9 +53,10 @@ export class LRUCache { if (oldest !== undefined) this.map.delete(oldest); } + const ttl = ttlOverrideMs ?? this.ttlMs; this.map.set(key, { value, - expiresAt: Date.now() + this.ttlMs, + expiresAt: Date.now() + ttl, }); } diff --git a/services/mana-geocoding/src/lib/privacy.ts b/services/mana-geocoding/src/lib/privacy.ts new file mode 100644 index 000000000..19e45e106 --- /dev/null +++ b/services/mana-geocoding/src/lib/privacy.ts @@ -0,0 +1,45 @@ +/** + * Privacy helpers for outbound public-API requests. + * + * The provider chain quantizes coordinates before forwarding to public + * geocoding endpoints (Photon, Nominatim) so the user's precise position + * doesn't end up in a third party's logs. + * + * Quantization rules: + * - **Forward search focus** (`focus.lat/lon`): 2 decimals ≈ 1.1 km + * resolution. Enough to bias results "near you" without revealing + * home/workplace addresses. + * - **Reverse-geocoding coords** (`lat/lon`): 3 decimals ≈ 110 m + * resolution. Trades a small amount of accuracy for the privacy of + * not telling Photon "user is at THIS HOUSE". Reverse geocoding + * against the city block instead of the building is acceptable. + * + * Pelias and other LAN-local providers always get the original + * full-precision coordinates — quantization only applies on the way + * out to the public internet. + */ + +/** ~1.1 km resolution. Enough for "results near me" biasing. */ +export const PUBLIC_FOCUS_DECIMALS = 2; + +/** ~110 m resolution. Identifies city block, not building. */ +export const PUBLIC_REVERSE_DECIMALS = 3; + +/** + * Round a coordinate to `decimals` decimal places. Accepts string or + * number to make the call sites clean (the wrapper passes strings + * straight from query params; tests use numbers). + * + * Returns a string so the caller can drop it directly into + * URLSearchParams without re-stringification (which would otherwise + * undo the quantization at toString-time). + */ +export function quantizeCoord( + value: string | number | undefined, + decimals: number +): string | undefined { + if (value === undefined || value === null || value === '') return undefined; + const n = typeof value === 'string' ? parseFloat(value) : value; + if (!Number.isFinite(n)) return undefined; + return n.toFixed(decimals); +} diff --git a/services/mana-geocoding/src/lib/sensitive-query.ts b/services/mana-geocoding/src/lib/sensitive-query.ts new file mode 100644 index 000000000..4eee1b02f --- /dev/null +++ b/services/mana-geocoding/src/lib/sensitive-query.ts @@ -0,0 +1,79 @@ +/** + * Detects search queries that touch privacy-sensitive categories — health, + * mental health, sexual/reproductive health, crisis services. These should + * never be forwarded to a public geocoding endpoint, where the query content + * (paired with our backend's IP and timestamp) could feed an aggregate + * location-intent profile. + * + * Behavior in the chain: + * - Sensitive query → only providers with `privacy: 'local'` are tried + * - If no local provider answers → return empty results + notice + * (rather than silently fall through to public APIs) + * + * Trade-offs: + * - False positives are OK (a user searching for "Praxis Müller" who + * wanted the dance studio gets 0 results when Pelias is down — not + * ideal but better than a privacy leak) + * - False negatives are NOT OK (we'd rather over-block than under-block) + * - The list is intentionally narrow: only words with clear medical or + * crisis-service intent. Generic "Beratung" / "Hilfe" are not blocked + * because they're too broad — most aren't health-related. + * + * We DON'T block: + * - "Apotheke" — too common as a landmark / street name in DACH + * - "Krankenhaus" alone (often a street name); only specific clinic + * names like "Klinikum X" trigger + * - LGBTQ+ / religion / financial-distress keywords — those would + * need their own UX (the user often wants those results found), + * and false positives there would hurt more than help. + */ + +/** + * Word-boundary patterns. Each must match the FULL token (so + * "hausarztstrasse" doesn't trigger). German umlauts use Unicode + * letter classes via `[a-zäöüß]` since `\w` and `\b` are ASCII-only + * in JavaScript regex. + * + * The `(?:^|[^a-zäöüß])` and `(?:[^a-zäöüß]|$)` lookarounds simulate + * Unicode word boundaries. + */ +const SENSITIVE_PATTERNS: RegExp[] = [ + // Doctors / specialist medical professionals + /(?:^|[^a-zäöüß])(arzt|ärztin|ärzte|hausarzt|kinderarzt|frauenarzt|gynäkologe|gynäkologin|urologe|urologin|dermatologe|dermatologin|orthopäde|orthopädin|augenarzt|augenärztin|hno-?arzt|hno-?ärztin|psychiater|psychiaterin|neurologe|neurologin|kardiologe|kardiologin|onkologe|onkologin|radiologe|radiologin)(?:[^a-zäöüß]|$)/i, + + // Clinic / hospital terms (paired with another word — "Klinikum X") + /(?:^|[^a-zäöüß])(klinikum|hospiz|reha-?klinik|psychiatrie|psychotherapie|psychotherapeut|psychotherapeutin|psychologe|psychologin|therapeutin|therapeut)(?:[^a-zäöüß]|$)/i, + + // Substance / addiction services + /(?:^|[^a-zäöüß])(suchtberatung|drogenberatung|alkoholberatung|methadon|substitution)(?:[^a-zäöüß]|$)/i, + + // Sexual / reproductive health. + // "pro familia" (German pregnancy-counselling org) gets its own pattern + // because the inter-word space breaks the word-boundary trick used in + // the others. Match it whether spelled with space, hyphen, or fused. + /(?:^|[^a-zäöüß])(hiv|aids|geschlechtskrank|schwangerschaftsabbruch|abtreibung|sexualberatung)(?:[^a-zäöüß]|$)/i, + /(?:^|[^a-zäöüß])(pro[ -]?familia)(?:[^a-zäöüß]|$)/i, + + // Crisis / domestic-violence services + /(?:^|[^a-zäöüß])(opferschutz|frauenhaus|männerhaus|gewaltschutz|telefonseelsorge|krisendienst|krisentelefon)(?:[^a-zäöüß]|$)/i, +]; + +export interface SensitivityCheck { + sensitive: boolean; + /** Which pattern matched, for logging. Not exposed to clients. */ + matchedToken?: string; +} + +export function isSensitiveQuery(q: string): SensitivityCheck { + if (!q || q.length < 3) return { sensitive: false }; + + const normalized = q.toLowerCase(); + for (const pattern of SENSITIVE_PATTERNS) { + const match = normalized.match(pattern); + if (match) { + // match[1] is the captured group — the actual sensitive token. + return { sensitive: true, matchedToken: match[1] }; + } + } + return { sensitive: false }; +} diff --git a/services/mana-geocoding/src/providers/__tests__/chain.test.ts b/services/mana-geocoding/src/providers/__tests__/chain.test.ts index 18ad82e04..ab4cc6a16 100644 --- a/services/mana-geocoding/src/providers/__tests__/chain.test.ts +++ b/services/mana-geocoding/src/providers/__tests__/chain.test.ts @@ -17,6 +17,7 @@ import type { class FakeProvider implements GeocodingProvider { calls = { search: 0, reverse: 0, health: 0 }; healthCalls: number[] = []; + readonly privacy: 'local' | 'public'; constructor( readonly name: ProviderName, @@ -24,8 +25,13 @@ class FakeProvider implements GeocodingProvider { search?: () => Promise; reverse?: () => Promise; health?: () => Promise; + privacy?: 'local' | 'public'; } = {} - ) {} + ) { + // Default to 'local' so existing chain tests keep working. The + // localOnly-mode tests below set this explicitly. + this.privacy = behavior.privacy ?? 'local'; + } async search(_req: SearchRequest): Promise { this.calls.search++; @@ -234,7 +240,7 @@ describe('ProviderChain — reverse', () => { const a = new FakeProvider('pelias', { reverse: async () => ({ ok: false, kind: 'unreachable' }), }); - const b = new FakeProvider('photon'); + const b = new FakeProvider('photon', { privacy: 'public' }); const chain = new ProviderChain({ providers: [a, b], healthCacheMs: 60_000 }); const res = await chain.reverse({ lat: '47.66', lon: '9.17', lang: 'de' }); expect(res.provider).toBe('photon'); @@ -242,3 +248,91 @@ describe('ProviderChain — reverse', () => { expect(b.calls.search).toBe(0); }); }); + +describe('ProviderChain — privacy / localOnly mode', () => { + it('skips public providers when localOnly is true', async () => { + const localPelias = new FakeProvider('pelias', { privacy: 'local' }); + const publicPhoton = new FakeProvider('photon', { privacy: 'public' }); + const publicNominatim = new FakeProvider('nominatim', { privacy: 'public' }); + const chain = new ProviderChain({ + providers: [localPelias, publicPhoton, publicNominatim], + healthCacheMs: 60_000, + }); + + const res = await chain.search(SEARCH, undefined, { localOnly: true }); + + expect(res.ok).toBe(true); + expect(res.provider).toBe('pelias'); + expect(localPelias.calls.search).toBe(1); + // Public providers must not even have their search() called + expect(publicPhoton.calls.search).toBe(0); + expect(publicNominatim.calls.search).toBe(0); + }); + + it('falls back to the second LOCAL provider when the first local fails', async () => { + const local1 = new FakeProvider('pelias', { + privacy: 'local', + search: async () => ({ ok: false, kind: 'unreachable' }), + }); + // Pretend we have a hypothetical second local provider + const local2 = new FakeProvider('photon', { privacy: 'local' }); + const chain = new ProviderChain({ + providers: [local1, local2], + healthCacheMs: 60_000, + }); + + const res = await chain.search(SEARCH, undefined, { localOnly: true }); + expect(res.ok).toBe(true); + expect(res.provider).toBe('photon'); + }); + + it('returns ok:true with empty results + sensitive_local_unavailable when no local provider works', async () => { + // All public, all healthy — but we asked for localOnly. The chain + // must NOT silently fall through to public providers. + const public1 = new FakeProvider('photon', { privacy: 'public' }); + const public2 = new FakeProvider('nominatim', { privacy: 'public' }); + const chain = new ProviderChain({ + providers: [public1, public2], + healthCacheMs: 60_000, + }); + + const res = await chain.search(SEARCH, undefined, { localOnly: true }); + + // The privacy contract is the load-bearing assertion: a sensitive + // query must NEVER reach a public provider, even if every local + // provider was filtered out. + expect(public1.calls.search).toBe(0); + expect(public2.calls.search).toBe(0); + expect(public1.calls.health).toBe(0); // not even probed + expect(public2.calls.health).toBe(0); + + expect(res.ok).toBe(true); + expect(res.results).toEqual([]); + expect(res.notice).toBe('sensitive_local_unavailable'); + expect(res.tried).toEqual([]); + }); + + it('returns notice: fallback_used when a public provider serves a non-sensitive query', async () => { + const localDown = new FakeProvider('pelias', { + privacy: 'local', + health: async () => false, + }); + const publicUp = new FakeProvider('photon', { privacy: 'public' }); + const chain = new ProviderChain({ + providers: [localDown, publicUp], + healthCacheMs: 60_000, + }); + + const res = await chain.search(SEARCH); + expect(res.provider).toBe('photon'); + expect(res.notice).toBe('fallback_used'); + }); + + it('NO notice when the local provider serves a non-sensitive query', async () => { + const localUp = new FakeProvider('pelias', { privacy: 'local' }); + const chain = new ProviderChain({ providers: [localUp], healthCacheMs: 60_000 }); + const res = await chain.search(SEARCH); + expect(res.provider).toBe('pelias'); + expect(res.notice).toBeUndefined(); + }); +}); diff --git a/services/mana-geocoding/src/providers/chain.ts b/services/mana-geocoding/src/providers/chain.ts index c55be87eb..07407f8c7 100644 --- a/services/mana-geocoding/src/providers/chain.ts +++ b/services/mana-geocoding/src/providers/chain.ts @@ -38,6 +38,25 @@ interface HealthEntry { checkedAt: number; } +/** + * Notice codes — surfaced to the route layer so the API response can carry + * a hint to the UI (e.g. badge a result as "approximate" or explain why + * a sensitive query returned 0 hits). + */ +export type ChainNotice = + /** Sensitive query was blocked from public providers and no local + * provider was healthy → no results, but the absence is intentional. */ + | 'sensitive_local_unavailable' + /** A non-Pelias provider served the request (Pelias was down). */ + | 'fallback_used'; + +export interface ChainOptions { + /** When true, only providers with `privacy: 'local'` are tried. + * Set this for queries that match the sensitive-keyword list so we + * don't leak medical / crisis-service queries to public endpoints. */ + localOnly?: boolean; +} + export interface ChainResponse { ok: boolean; provider?: ProviderName; @@ -45,6 +64,8 @@ export interface ChainResponse { /** Names of providers that were tried but failed before we got a hit. * Useful for telemetry (`x-geocoding-tried` response header). */ tried: ProviderName[]; + /** Optional UX hint — see `ChainNotice` for the meanings. */ + notice?: ChainNotice; } export class ProviderChain { @@ -52,17 +73,26 @@ export class ProviderChain { constructor(private readonly config: ChainConfig) {} - async search(req: SearchRequest, signal?: AbortSignal): Promise { - return this.run(req, signal, (p, r, s) => p.search(r as SearchRequest, s)); + async search( + req: SearchRequest, + signal?: AbortSignal, + options: ChainOptions = {} + ): Promise { + return this.run(req, signal, options, (p, r, s) => p.search(r as SearchRequest, s)); } - async reverse(req: ReverseRequest, signal?: AbortSignal): Promise { - return this.run(req, signal, (p, r, s) => p.reverse(r as ReverseRequest, s)); + async reverse( + req: ReverseRequest, + signal?: AbortSignal, + options: ChainOptions = {} + ): Promise { + return this.run(req, signal, options, (p, r, s) => p.reverse(r as ReverseRequest, s)); } private async run( req: SearchRequest | ReverseRequest, signal: AbortSignal | undefined, + options: ChainOptions, call: ( provider: GeocodingProvider, req: SearchRequest | ReverseRequest, @@ -71,7 +101,16 @@ export class ProviderChain { ): Promise { const tried: ProviderName[] = []; - for (const provider of this.config.providers) { + // Filter providers up front: in local-only mode (sensitive query), + // drop everything with `privacy: 'public'` BEFORE we even probe + // health. This guarantees a sensitive query can never reach a + // public endpoint, even on a tight race window between health + // caching and provider iteration. + const candidates = options.localOnly + ? this.config.providers.filter((p) => p.privacy === 'local') + : this.config.providers; + + for (const provider of candidates) { if (!(await this.isHealthy(provider, signal))) { continue; } @@ -81,7 +120,8 @@ export class ProviderChain { if (result.ok) { // Success — even if results=[], that's a definitive answer. - return { ok: true, provider: provider.name, results: result.results, tried }; + const notice = provider.privacy === 'public' ? ('fallback_used' as const) : undefined; + return { ok: true, provider: provider.name, results: result.results, tried, notice }; } // Failure — mark unhealthy and fall through. @@ -93,6 +133,18 @@ export class ProviderChain { }); } + // All candidates failed (or the sensitive-query filter left us with + // none). Distinguish the two so the UI can show different copy: + // - "no results found" (generic chain failure) + // - "this search stays local — currently unavailable" (sensitive) + if (options.localOnly) { + return { + ok: true, + results: [], + tried, + notice: 'sensitive_local_unavailable', + }; + } return { ok: false, results: [], tried }; } diff --git a/services/mana-geocoding/src/providers/nominatim.ts b/services/mana-geocoding/src/providers/nominatim.ts index 37e5edd25..590da3bc5 100644 --- a/services/mana-geocoding/src/providers/nominatim.ts +++ b/services/mana-geocoding/src/providers/nominatim.ts @@ -15,6 +15,7 @@ */ import { mapOsmTagToPlaceCategory } from '../lib/osm-category-map'; +import { PUBLIC_REVERSE_DECIMALS, quantizeCoord } from '../lib/privacy'; import type { RateLimiter } from '../lib/rate-limiter'; import type { GeocodingProvider, @@ -32,6 +33,7 @@ export interface NominatimConfig { export class NominatimProvider implements GeocodingProvider { readonly name = 'nominatim' as const; + readonly privacy = 'public' as const; constructor( private readonly config: NominatimConfig, @@ -46,6 +48,9 @@ export class NominatimProvider implements GeocodingProvider { limit: String(req.limit), 'accept-language': req.lang, }); + // Nominatim doesn't have a focus param, but it accepts a viewbox. + // We don't currently use it — the user's location stays out of the + // query entirely, which is the safer default. try { const json = await this.limiter.run( @@ -66,9 +71,16 @@ export class NominatimProvider implements GeocodingProvider { } async reverse(req: ReverseRequest, signal?: AbortSignal): Promise { + // Quantize to ~110 m so we never log the user's exact GPS to a + // public endpoint. City-block resolution is enough for "what's + // near me?" — and reverse geocoding is the most identifying + // query type (a precise lat/lon often equals a precise home + // address). + const qLat = quantizeCoord(req.lat, PUBLIC_REVERSE_DECIMALS); + const qLon = quantizeCoord(req.lon, PUBLIC_REVERSE_DECIMALS); const params = new URLSearchParams({ - lat: req.lat, - lon: req.lon, + lat: qLat ?? req.lat, + lon: qLon ?? req.lon, format: 'json', addressdetails: '1', 'accept-language': req.lang, diff --git a/services/mana-geocoding/src/providers/pelias.ts b/services/mana-geocoding/src/providers/pelias.ts index ad3c703c2..4c0a5d0f3 100644 --- a/services/mana-geocoding/src/providers/pelias.ts +++ b/services/mana-geocoding/src/providers/pelias.ts @@ -22,6 +22,7 @@ export interface PeliasConfig { export class PeliasProvider implements GeocodingProvider { readonly name = 'pelias' as const; + readonly privacy = 'local' as const; constructor(private readonly config: PeliasConfig) {} diff --git a/services/mana-geocoding/src/providers/photon.ts b/services/mana-geocoding/src/providers/photon.ts index c0d4be113..508ad2cb6 100644 --- a/services/mana-geocoding/src/providers/photon.ts +++ b/services/mana-geocoding/src/providers/photon.ts @@ -17,6 +17,7 @@ */ import { mapOsmTagToPlaceCategory } from '../lib/osm-category-map'; +import { PUBLIC_FOCUS_DECIMALS, PUBLIC_REVERSE_DECIMALS, quantizeCoord } from '../lib/privacy'; import type { GeocodingProvider, GeocodingResult, @@ -32,6 +33,7 @@ export interface PhotonConfig { export class PhotonProvider implements GeocodingProvider { readonly name = 'photon' as const; + readonly privacy = 'public' as const; constructor(private readonly config: PhotonConfig) {} @@ -41,9 +43,16 @@ export class PhotonProvider implements GeocodingProvider { limit: String(req.limit), lang: req.lang, }); - if (req.focusLat && req.focusLon) { - params.set('lat', req.focusLat); - params.set('lon', req.focusLon); + // Quantize the user's focus point before forwarding. Photon biases + // results toward "near this lat/lon"; we don't need to send the + // user's exact GPS — 2 decimals (~1.1 km) is enough for the bias + // to work and keeps the user's home/workplace coords out of + // Photon's logs. + const qLat = quantizeCoord(req.focusLat, PUBLIC_FOCUS_DECIMALS); + const qLon = quantizeCoord(req.focusLon, PUBLIC_FOCUS_DECIMALS); + if (qLat && qLon) { + params.set('lat', qLat); + params.set('lon', qLon); } try { @@ -63,9 +72,15 @@ export class PhotonProvider implements GeocodingProvider { async reverse(req: ReverseRequest, signal?: AbortSignal): Promise { // Photon expects lon + lat, not point.lat/point.lon. Easy footgun. + // Quantize to ~110 m so we don't reverse-geocode the user's exact + // front door — city-block resolution is enough for the Places UI's + // "What's near me?" use case, and we never want to log a precise + // home location to a third party. + const qLat = quantizeCoord(req.lat, PUBLIC_REVERSE_DECIMALS); + const qLon = quantizeCoord(req.lon, PUBLIC_REVERSE_DECIMALS); const params = new URLSearchParams({ - lat: req.lat, - lon: req.lon, + lat: qLat ?? req.lat, + lon: qLon ?? req.lon, lang: req.lang, }); diff --git a/services/mana-geocoding/src/providers/types.ts b/services/mana-geocoding/src/providers/types.ts index 89d343b55..0408e788d 100644 --- a/services/mana-geocoding/src/providers/types.ts +++ b/services/mana-geocoding/src/providers/types.ts @@ -75,6 +75,15 @@ export type ProviderResponse = export interface GeocodingProvider { readonly name: ProviderName; + /** + * Privacy stance: + * - `'local'`: backend runs on our infrastructure, query content + * never leaves our network. Eligible for sensitive queries. + * - `'public'`: backend is a public third-party API. The query + * content + our outbound IP are visible to that third party. + * Skipped when the chain is in local-only mode (sensitive query). + */ + readonly privacy: 'local' | 'public'; search(req: SearchRequest, signal?: AbortSignal): Promise; reverse(req: ReverseRequest, signal?: AbortSignal): Promise; /** Cheap probe — `true` means the backend is reachable right now. diff --git a/services/mana-geocoding/src/routes/geocode.ts b/services/mana-geocoding/src/routes/geocode.ts index 5fb4acead..24c0f7077 100644 --- a/services/mana-geocoding/src/routes/geocode.ts +++ b/services/mana-geocoding/src/routes/geocode.ts @@ -10,12 +10,29 @@ import { Hono } from 'hono'; import type { Config } from '../config'; import { LRUCache } from '../lib/cache'; -import type { ProviderChain } from '../providers/chain'; +import { isSensitiveQuery } from '../lib/sensitive-query'; +import type { ChainNotice, ProviderChain } from '../providers/chain'; import type { GeocodingResult, ProviderName } from '../providers/types'; interface CachedAnswer { results: GeocodingResult[]; provider: ProviderName | undefined; + notice?: ChainNotice; +} + +/** + * TTL chooser. Public-API results (Photon/Nominatim) get the longer TTL — + * caching aggressively is the main privacy lever once the query has + * already left our network. Local results stay on the shorter TTL because + * the Pelias index can be re-imported; we don't want stale local data. + * + * Sensitive-query notices are cached on the short TTL too (the user might + * retry from a different angle quickly), and `undefined` provider (chain + * served-empty case) defaults to local TTL. + */ +function ttlFor(provider: ProviderName | undefined, config: Config): number { + if (provider === 'photon' || provider === 'nominatim') return config.cache.publicTtlMs; + return config.cache.ttlMs; } export function createGeocodeRoutes(config: Config, chain: ProviderChain) { @@ -38,6 +55,13 @@ export function createGeocodeRoutes(config: Config, chain: ProviderChain) { const focusLat = c.req.query('focus.lat'); const focusLon = c.req.query('focus.lon'); + // Sensitive-query check happens BEFORE the cache lookup. The cache + // key includes focus coords; we want the privacy decision baked into + // the cached value, not retroactively flipped if the keyword list + // changes. Cached entries from prior sensitive queries are fine — + // they were stored from a localOnly run. + const sensitivity = isSensitiveQuery(q); + const cacheKey = `${q}|${limit}|${lang}|${focusLat}|${focusLon}`; const cached = searchCache.get(cacheKey); if (cached) { @@ -45,19 +69,31 @@ export function createGeocodeRoutes(config: Config, chain: ProviderChain) { results: cached.results, cached: true, provider: cached.provider, + ...(cached.notice ? { notice: cached.notice } : {}), }); } - const response = await chain.search({ q, limit, lang, focusLat, focusLon }); + const response = await chain.search({ q, limit, lang, focusLat, focusLon }, undefined, { + localOnly: sensitivity.sensitive, + }); if (!response.ok) { return c.json({ results: [], error: 'geocoding_unavailable', tried: response.tried }, 502); } - searchCache.set(cacheKey, { results: response.results, provider: response.provider }); + searchCache.set( + cacheKey, + { + results: response.results, + provider: response.provider, + notice: response.notice, + }, + ttlFor(response.provider, config) + ); return c.json({ results: response.results, provider: response.provider, tried: response.tried, + ...(response.notice ? { notice: response.notice } : {}), }); }); @@ -85,19 +121,33 @@ export function createGeocodeRoutes(config: Config, chain: ProviderChain) { results: cached.results, cached: true, provider: cached.provider, + ...(cached.notice ? { notice: cached.notice } : {}), }); } + // Reverse geocoding has no query string to classify, so no + // sensitive-keyword check applies — the privacy lever here is the + // quantization that happens inside the public providers (Photon + // and Nominatim round to ~110 m before forwarding). const response = await chain.reverse({ lat: roundedLat, lon: roundedLon, lang }); if (!response.ok) { return c.json({ results: [], error: 'geocoding_unavailable', tried: response.tried }, 502); } - reverseCache.set(cacheKey, { results: response.results, provider: response.provider }); + reverseCache.set( + cacheKey, + { + results: response.results, + provider: response.provider, + notice: response.notice, + }, + ttlFor(response.provider, config) + ); return c.json({ results: response.results, provider: response.provider, tried: response.tried, + ...(response.notice ? { notice: response.notice } : {}), }); });