From b5d55fdb21ab369bd64a22017820fd454aef08ef Mon Sep 17 00:00:00 2001 From: Till JS Date: Sat, 18 Apr 2026 15:30:46 +0200 Subject: [PATCH] =?UTF-8?q?feat(events):=20add=20Event=20Discovery=20?= =?UTF-8?q?=E2=80=94=20Phase=201=20+=202?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1: Manual iCal feeds + Discovery tab - 5 new DB tables in event_discovery schema (regions, interests, sources, discovered_events, user_actions) - iCal parser (node-ical) with deduplication (SHA-256 hash) - Crawl scheduler (15-min interval, auto-deactivate after 5 errors) - CRUD routes for regions, interests, sources + paginated feed endpoint - Frontend: "Meine Events" / "Entdecken" tab navigation in ListView - Discovery setup wizard (regions via mana-geocoding + interests) - DiscoveredEventCard with save/dismiss, SourceManager for iCal feeds - "Merken" creates a local socialEvent from discovered event Phase 2: Auto source discovery + LLM extraction + relevance scoring - Source discoverer: web search via mana-research to auto-find iCal feeds and venue websites for a region - Website extractor: crawl via mana-research /extract, then LLM-based event extraction via mana-llm with structured JSON output - Flexible date parsing (ISO, DD.MM.YYYY), markdown fence stripping - Relevance scorer: category match, freetext match, haversine distance, time proximity, weekend bonus (0-100 clamped) - Routes: POST regions/:id/discover-sources, PUT/DELETE sources/:id/activate|reject - Frontend: "Automatisch finden" button, suggested vs active sources UI 107 tests (all passing), no regressions. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/lib/modules/events/ListView.svelte | 128 ++-- .../components/DiscoveredEventCard.svelte | 204 +++++ .../events/components/DiscoverySetup.svelte | 169 +++++ .../events/components/DiscoveryTab.svelte | 188 +++++ .../events/components/RegionPicker.svelte | 241 ++++++ .../events/components/SourceManager.svelte | 384 ++++++++++ .../src/lib/modules/events/discovery/api.ts | 170 +++++ .../modules/events/discovery/store.svelte.ts | 206 +++++ .../src/lib/modules/events/discovery/types.ts | 72 ++ docs/plans/event-discovery.md | 711 ++++++++++++++++++ pnpm-lock.yaml | 44 ++ services/mana-events/drizzle.config.ts | 2 +- services/mana-events/package.json | 5 +- .../src/__tests__/deduplicator.test.ts | 80 ++ .../src/__tests__/discovery.test.ts | 389 ++++++++++ services/mana-events/src/__tests__/helpers.ts | 8 + .../src/__tests__/ical-parser.test.ts | 188 +++++ .../src/__tests__/ical-real-feeds.test.ts | 130 ++++ .../mana-events/src/__tests__/scorer.test.ts | 133 ++++ .../src/__tests__/website-extractor.test.ts | 144 ++++ services/mana-events/src/app.ts | 7 + services/mana-events/src/config.ts | 5 + .../mana-events/src/db/schema/discovery.ts | 144 ++++ services/mana-events/src/db/schema/index.ts | 1 + .../src/discovery/crawl-scheduler.ts | 279 +++++++ .../mana-events/src/discovery/deduplicator.ts | 27 + .../mana-events/src/discovery/ical-parser.ts | 96 +++ services/mana-events/src/discovery/scorer.ts | 80 ++ .../src/discovery/source-discoverer.ts | 187 +++++ services/mana-events/src/discovery/types.ts | 50 ++ .../src/discovery/website-extractor.ts | 236 ++++++ services/mana-events/src/index.ts | 7 + .../mana-events/src/routes/discovery-feed.ts | 136 ++++ services/mana-events/src/routes/discovery.ts | 299 ++++++++ 34 files changed, 5105 insertions(+), 45 deletions(-) create mode 100644 apps/mana/apps/web/src/lib/modules/events/components/DiscoveredEventCard.svelte create mode 100644 apps/mana/apps/web/src/lib/modules/events/components/DiscoverySetup.svelte create mode 100644 apps/mana/apps/web/src/lib/modules/events/components/DiscoveryTab.svelte create mode 100644 apps/mana/apps/web/src/lib/modules/events/components/RegionPicker.svelte create mode 100644 apps/mana/apps/web/src/lib/modules/events/components/SourceManager.svelte create mode 100644 apps/mana/apps/web/src/lib/modules/events/discovery/api.ts create mode 100644 apps/mana/apps/web/src/lib/modules/events/discovery/store.svelte.ts create mode 100644 apps/mana/apps/web/src/lib/modules/events/discovery/types.ts create mode 100644 docs/plans/event-discovery.md create mode 100644 services/mana-events/src/__tests__/deduplicator.test.ts create mode 100644 services/mana-events/src/__tests__/discovery.test.ts create mode 100644 services/mana-events/src/__tests__/ical-parser.test.ts create mode 100644 services/mana-events/src/__tests__/ical-real-feeds.test.ts create mode 100644 services/mana-events/src/__tests__/scorer.test.ts create mode 100644 services/mana-events/src/__tests__/website-extractor.test.ts create mode 100644 services/mana-events/src/db/schema/discovery.ts create mode 100644 services/mana-events/src/discovery/crawl-scheduler.ts create mode 100644 services/mana-events/src/discovery/deduplicator.ts create mode 100644 services/mana-events/src/discovery/ical-parser.ts create mode 100644 services/mana-events/src/discovery/scorer.ts create mode 100644 services/mana-events/src/discovery/source-discoverer.ts create mode 100644 services/mana-events/src/discovery/types.ts create mode 100644 services/mana-events/src/discovery/website-extractor.ts create mode 100644 services/mana-events/src/routes/discovery-feed.ts create mode 100644 services/mana-events/src/routes/discovery.ts diff --git a/apps/mana/apps/web/src/lib/modules/events/ListView.svelte b/apps/mana/apps/web/src/lib/modules/events/ListView.svelte index a530690fb..0eb7e1f2d 100644 --- a/apps/mana/apps/web/src/lib/modules/events/ListView.svelte +++ b/apps/mana/apps/web/src/lib/modules/events/ListView.svelte @@ -4,6 +4,7 @@ import { eventsStore } from './stores/events.svelte'; import { drainTombstones } from './tombstones'; import EventCard from './components/EventCard.svelte'; + import DiscoveryTab from './components/DiscoveryTab.svelte'; import type { SocialEvent } from './types'; import type { ViewProps } from '$lib/app-registry'; @@ -13,6 +14,8 @@ const past = usePastEvents(); const guestsByEvent = useGuestsByEvent(); + let activeTab = $state<'mine' | 'discover'>('mine'); + // Retry any orphaned server snapshots from previous failed deletes. onMount(() => { void drainTombstones(); @@ -60,55 +63,72 @@
-
-

- {(upcoming.value ?? []).length} bevorstehend · {(past.value ?? []).length} vergangen -

- -
+ +
- {#if showCreate} -
- -
- - - -
- -
- {/if} + {#if activeTab === 'mine'} +
+

+ {(upcoming.value ?? []).length} bevorstehend · {(past.value ?? []).length} vergangen +

+ +
-
-

Bevorstehend

- {#if (upcoming.value ?? []).length === 0} -

Keine bevorstehenden Events. Zeit für eine Party?

- {:else} -
- {#each upcoming.value ?? [] as event (event.id)} - {@const summary = summarizeRsvps(guestsByEvent.value?.get(event.id) ?? [])} - open(event)} /> - {/each} -
+ {#if showCreate} +
+ +
+ + + +
+ +
{/if} -
- {#if (past.value ?? []).length > 0}
-

Vergangen

-
- {#each past.value ?? [] as event (event.id)} - open(event)} /> - {/each} -
+

Bevorstehend

+ {#if (upcoming.value ?? []).length === 0} +

Keine bevorstehenden Events. Zeit fur eine Party?

+ {:else} +
+ {#each upcoming.value ?? [] as event (event.id)} + {@const summary = summarizeRsvps(guestsByEvent.value?.get(event.id) ?? [])} + open(event)} /> + {/each} +
+ {/if}
+ + {#if (past.value ?? []).length > 0} +
+

Vergangen

+
+ {#each past.value ?? [] as event (event.id)} + open(event)} /> + {/each} +
+
+ {/if} + {:else} + {/if} @@ -121,6 +141,30 @@ max-width: 880px; margin: 0 auto; } + .tab-bar { + display: flex; + gap: 0; + border-bottom: 1px solid hsl(var(--color-border)); + } + .tab { + padding: 0.5rem 1rem; + border: none; + background: none; + font-size: 0.875rem; + font-weight: 500; + color: hsl(var(--color-muted-foreground)); + cursor: pointer; + border-bottom: 2px solid transparent; + margin-bottom: -1px; + font-family: inherit; + } + .tab.active { + color: hsl(var(--color-foreground)); + border-bottom-color: hsl(var(--color-primary)); + } + .tab:hover:not(.active) { + color: hsl(var(--color-foreground)); + } .events-header { display: flex; align-items: flex-start; diff --git a/apps/mana/apps/web/src/lib/modules/events/components/DiscoveredEventCard.svelte b/apps/mana/apps/web/src/lib/modules/events/components/DiscoveredEventCard.svelte new file mode 100644 index 000000000..844e6c7c6 --- /dev/null +++ b/apps/mana/apps/web/src/lib/modules/events/components/DiscoveredEventCard.svelte @@ -0,0 +1,204 @@ + + +
+
+
{dateLabel}
+
{timeLabel}
+
+
+
+

{event.title}

+ {#if event.category} + {event.category} + {/if} +
+ {#if event.location} +
{event.location}
+ {/if} + {#if event.priceInfo} +
{event.priceInfo}
+ {/if} + {#if event.description} +
+ {event.description.slice(0, 150)}{event.description.length > 150 ? '...' : ''} +
+ {/if} + +
+
+ + diff --git a/apps/mana/apps/web/src/lib/modules/events/components/DiscoverySetup.svelte b/apps/mana/apps/web/src/lib/modules/events/components/DiscoverySetup.svelte new file mode 100644 index 000000000..b4c484132 --- /dev/null +++ b/apps/mana/apps/web/src/lib/modules/events/components/DiscoverySetup.svelte @@ -0,0 +1,169 @@ + + +
+

Event-Entdeckung einrichten

+ + {#if step === 1} +
+

Welche Regionen sollen nach Events durchsucht werden?

+ + +
+ {:else} +
+

Was interessiert dich?

+
+ {#each EVENT_CATEGORIES as cat} + + {/each} +
+ +
+ + +
+
+ {/if} +
+ + diff --git a/apps/mana/apps/web/src/lib/modules/events/components/DiscoveryTab.svelte b/apps/mana/apps/web/src/lib/modules/events/components/DiscoveryTab.svelte new file mode 100644 index 000000000..480d663f5 --- /dev/null +++ b/apps/mana/apps/web/src/lib/modules/events/components/DiscoveryTab.svelte @@ -0,0 +1,188 @@ + + +
+ {#if !initialized} +

Lade...

+ {:else if !discoveryStore.isSetUp} + + {:else} +
+ +
+ + +
+
+ + {#if showSources} + + {/if} + + {#if discoveryStore.loading} +

Lade Events...

+ {:else if discoveryStore.error} +

{discoveryStore.error}

+ {:else if discoveryStore.feed.length === 0} +
+

Noch keine Events gefunden

+

+ Fuge iCal-Feeds von Venues oder Vereinen hinzu, um Events zu entdecken. +

+ {#if !showSources} + + {/if} +
+ {:else} +
+ {#each discoveryStore.feed as event (event.id)} + handleSave(event.id)} + onDismiss={() => handleDismiss(event.id)} + /> + {/each} + {#if discoveryStore.feedHasMore} + + {/if} +
+ {/if} + {/if} +
+ + diff --git a/apps/mana/apps/web/src/lib/modules/events/components/RegionPicker.svelte b/apps/mana/apps/web/src/lib/modules/events/components/RegionPicker.svelte new file mode 100644 index 000000000..b3bdccf6f --- /dev/null +++ b/apps/mana/apps/web/src/lib/modules/events/components/RegionPicker.svelte @@ -0,0 +1,241 @@ + + +
+
+ {#each regions as region (region.id)} +
+ {region.label} + {region.radiusKm} km + +
+ {/each} + {#if !showForm} + + {/if} +
+ + {#if showForm} +
+ +
+ + +
+ {#if suggestions.length > 0} +
    + {#each suggestions as s} +
  • + +
  • + {/each} +
+ {/if} + {#if searching} +

Suche...

+ {/if} + +
+ {/if} +
+ + diff --git a/apps/mana/apps/web/src/lib/modules/events/components/SourceManager.svelte b/apps/mana/apps/web/src/lib/modules/events/components/SourceManager.svelte new file mode 100644 index 000000000..267476694 --- /dev/null +++ b/apps/mana/apps/web/src/lib/modules/events/components/SourceManager.svelte @@ -0,0 +1,384 @@ + + +
+
+

Quellen

+
+ + {#if !showForm} + + {/if} +
+
+ + {#if showForm} +
+ + + {#if regions.length > 1} + + {/if} +
+ + +
+
+ {/if} + + {#if suggestedSources.length > 0} +
+

Vorgeschlagene Quellen

+
+ {#each suggestedSources as source (source.id)} +
+
+
{source.name}
+
+ {source.type.toUpperCase()} + {#if source.url} + · {new URL(source.url).hostname} + {/if} +
+
+
+ + +
+
+ {/each} +
+
+ {/if} + + {#if activeSources.length === 0 && suggestedSources.length === 0} +

+ Noch keine Quellen. Nutze "Automatisch finden" oder fuge iCal-Feeds manuell hinzu. +

+ {:else if activeSources.length > 0} +
+ {#each activeSources as source (source.id)} +
0}> +
+
{source.name}
+
+ {source.type.toUpperCase()} · Letzter Scan: {formatDate(source.lastCrawledAt)} + {#if source.errorCount > 0} + {source.errorCount} Fehler + {/if} +
+ {#if source.lastError} +
{source.lastError}
+ {/if} +
+
+ + +
+
+ {/each} +
+ {/if} +
+ + diff --git a/apps/mana/apps/web/src/lib/modules/events/discovery/api.ts b/apps/mana/apps/web/src/lib/modules/events/discovery/api.ts new file mode 100644 index 000000000..9af3f8cc2 --- /dev/null +++ b/apps/mana/apps/web/src/lib/modules/events/discovery/api.ts @@ -0,0 +1,170 @@ +/** + * Discovery HTTP client — JWT-authenticated calls to mana-events discovery endpoints. + */ + +import { authStore } from '$lib/stores/auth.svelte'; +import { getManaEventsUrl } from '$lib/api/config'; +import type { DiscoveryRegion, DiscoveryInterest, DiscoverySource, DiscoveredEvent } from './types'; + +async function fetchWithAuth(path: string, init: RequestInit = {}): Promise { + const token = await authStore.getValidToken(); + const res = await fetch(`${getManaEventsUrl()}${path}`, { + ...init, + headers: { + 'Content-Type': 'application/json', + ...(token ? { Authorization: `Bearer ${token}` } : {}), + ...init.headers, + }, + }); + if (!res.ok) { + const err = await res.json().catch(() => ({ message: 'Request failed' })); + throw new Error(err.message || `HTTP ${res.status}`); + } + return res.json() as Promise; +} + +// ─── Regions ──────────────────────────────────────────────────────── + +export async function getRegions(): Promise { + const { regions } = await fetchWithAuth<{ regions: DiscoveryRegion[] }>( + '/api/v1/discovery/regions' + ); + return regions; +} + +export async function createRegion(input: { + label: string; + lat: number; + lon: number; + radiusKm?: number; +}): Promise { + const { region } = await fetchWithAuth<{ region: DiscoveryRegion }>('/api/v1/discovery/regions', { + method: 'POST', + body: JSON.stringify(input), + }); + return region; +} + +export async function updateRegion( + id: string, + input: { label?: string; radiusKm?: number; isActive?: boolean } +): Promise { + const { region } = await fetchWithAuth<{ region: DiscoveryRegion }>( + `/api/v1/discovery/regions/${id}`, + { method: 'PUT', body: JSON.stringify(input) } + ); + return region; +} + +export async function deleteRegion(id: string): Promise { + await fetchWithAuth(`/api/v1/discovery/regions/${id}`, { method: 'DELETE' }); +} + +// ─── Interests ────────────────────────────────────────────────────── + +export async function getInterests(): Promise { + const { interests } = await fetchWithAuth<{ interests: DiscoveryInterest[] }>( + '/api/v1/discovery/interests' + ); + return interests; +} + +export async function createInterest(input: { + category: string; + freetext?: string | null; + weight?: number; +}): Promise { + const { interest } = await fetchWithAuth<{ interest: DiscoveryInterest }>( + '/api/v1/discovery/interests', + { method: 'POST', body: JSON.stringify(input) } + ); + return interest; +} + +export async function deleteInterest(id: string): Promise { + await fetchWithAuth(`/api/v1/discovery/interests/${id}`, { method: 'DELETE' }); +} + +// ─── Sources ──────────────────────────────────────────────────────── + +export async function getSources(): Promise { + const { sources } = await fetchWithAuth<{ sources: DiscoverySource[] }>( + '/api/v1/discovery/sources' + ); + return sources; +} + +export async function createSource(input: { + type: 'ical' | 'website'; + url: string; + name: string; + regionId: string; + crawlIntervalHours?: number; +}): Promise { + const { source } = await fetchWithAuth<{ source: DiscoverySource }>('/api/v1/discovery/sources', { + method: 'POST', + body: JSON.stringify(input), + }); + return source; +} + +export async function deleteSource(id: string): Promise { + await fetchWithAuth(`/api/v1/discovery/sources/${id}`, { method: 'DELETE' }); +} + +export async function crawlSourceNow(id: string): Promise<{ upserted: number; error?: string }> { + return fetchWithAuth(`/api/v1/discovery/sources/${id}/crawl`, { method: 'POST' }); +} + +export async function activateSource(id: string): Promise { + const { source } = await fetchWithAuth<{ source: DiscoverySource }>( + `/api/v1/discovery/sources/${id}/activate`, + { method: 'PUT' } + ); + return source; +} + +export async function rejectSource(id: string): Promise { + await fetchWithAuth(`/api/v1/discovery/sources/${id}/reject`, { method: 'DELETE' }); +} + +export async function discoverSources( + regionId: string +): Promise<{ suggestedCount: number; queries: number; searchResults: number }> { + return fetchWithAuth(`/api/v1/discovery/regions/${regionId}/discover-sources`, { + method: 'POST', + }); +} + +// ─── Feed ─────────────────────────────────────────────────────────── + +export interface FeedParams { + from?: string; + to?: string; + category?: string; + limit?: number; + offset?: number; + hideDismissed?: boolean; +} + +export async function getFeed( + params: FeedParams = {} +): Promise<{ events: DiscoveredEvent[]; total: number; hasMore: boolean }> { + const searchParams = new URLSearchParams(); + if (params.from) searchParams.set('from', params.from); + if (params.to) searchParams.set('to', params.to); + if (params.category) searchParams.set('category', params.category); + if (params.limit) searchParams.set('limit', String(params.limit)); + if (params.offset) searchParams.set('offset', String(params.offset)); + if (params.hideDismissed) searchParams.set('hideDismissed', 'true'); + + const qs = searchParams.toString(); + return fetchWithAuth(`/api/v1/discovery/feed${qs ? `?${qs}` : ''}`); +} + +export async function setEventAction(eventId: string, action: 'save' | 'dismiss'): Promise { + await fetchWithAuth(`/api/v1/discovery/feed/${eventId}/action`, { + method: 'POST', + body: JSON.stringify({ action }), + }); +} diff --git a/apps/mana/apps/web/src/lib/modules/events/discovery/store.svelte.ts b/apps/mana/apps/web/src/lib/modules/events/discovery/store.svelte.ts new file mode 100644 index 000000000..259e2d043 --- /dev/null +++ b/apps/mana/apps/web/src/lib/modules/events/discovery/store.svelte.ts @@ -0,0 +1,206 @@ +/** + * Discovery store — reactive state for regions, interests, sources, and feed. + * + * Server-authoritative: all reads fetch from mana-events, no Dexie. + * State is held in Svelte 5 runes ($state) and refreshed on mount / mutation. + */ + +import * as api from './api'; +import type { DiscoveryRegion, DiscoveryInterest, DiscoverySource, DiscoveredEvent } from './types'; +import { eventsStore } from '../stores/events.svelte'; + +// ─── State ────────────────────────────────────────────────────────── + +let regions = $state([]); +let interests = $state([]); +let sources = $state([]); +let feed = $state([]); +let feedHasMore = $state(false); +let loading = $state(false); +let error = $state(null); + +// ─── Loaders ────────────────────────────────────────────��─────────── + +async function loadRegions() { + try { + regions = await api.getRegions(); + } catch (e) { + console.error('[discovery] failed to load regions:', e); + } +} + +async function loadInterests() { + try { + interests = await api.getInterests(); + } catch (e) { + console.error('[discovery] failed to load interests:', e); + } +} + +async function loadSources() { + try { + sources = await api.getSources(); + } catch (e) { + console.error('[discovery] failed to load sources:', e); + } +} + +async function loadFeed(params: api.FeedParams = {}) { + loading = true; + error = null; + try { + const result = await api.getFeed({ hideDismissed: true, ...params }); + feed = result.events; + feedHasMore = result.hasMore; + } catch (e) { + error = e instanceof Error ? e.message : 'Fehler beim Laden'; + console.error('[discovery] failed to load feed:', e); + } finally { + loading = false; + } +} + +// ─── Exported store ───────────────────────────────────────────────── + +export const discoveryStore = { + // Reactive getters + get regions() { + return regions; + }, + get interests() { + return interests; + }, + get sources() { + return sources; + }, + get feed() { + return feed; + }, + get feedHasMore() { + return feedHasMore; + }, + get loading() { + return loading; + }, + get error() { + return error; + }, + get isSetUp() { + return regions.length > 0; + }, + + // ── Init ───────────────────────────────────────────────────── + async init() { + await Promise.all([loadRegions(), loadInterests(), loadSources()]); + if (regions.length > 0) { + await loadFeed(); + } + }, + + async refreshFeed(params?: api.FeedParams) { + await loadFeed(params); + }, + + // ── Regions ────────────────────────────────────────────────── + async addRegion(input: { label: string; lat: number; lon: number; radiusKm?: number }) { + const region = await api.createRegion(input); + regions = [...regions, region]; + return region; + }, + + async updateRegion(id: string, input: { label?: string; radiusKm?: number; isActive?: boolean }) { + const region = await api.updateRegion(id, input); + regions = regions.map((r) => (r.id === id ? region : r)); + return region; + }, + + async removeRegion(id: string) { + await api.deleteRegion(id); + regions = regions.filter((r) => r.id !== id); + }, + + // ── Interests ──────────────────────────────────────────────── + async addInterest(input: { category: string; freetext?: string | null; weight?: number }) { + const interest = await api.createInterest(input); + interests = [...interests, interest]; + return interest; + }, + + async removeInterest(id: string) { + await api.deleteInterest(id); + interests = interests.filter((i) => i.id !== id); + }, + + // ── Sources ���───────────────────────────────────────────────── + async addSource(input: { + type: 'ical' | 'website'; + url: string; + name: string; + regionId: string; + crawlIntervalHours?: number; + }) { + const source = await api.createSource(input); + sources = [...sources, source]; + // Trigger immediate crawl + api + .crawlSourceNow(source.id) + .then(() => loadFeed()) + .catch(() => {}); + return source; + }, + + async removeSource(id: string) { + await api.deleteSource(id); + sources = sources.filter((s) => s.id !== id); + }, + + async crawlSource(id: string) { + const result = await api.crawlSourceNow(id); + await loadSources(); + await loadFeed(); + return result; + }, + + async activateSource(id: string) { + const source = await api.activateSource(id); + sources = sources.map((s) => (s.id === id ? source : s)); + }, + + async rejectSource(id: string) { + await api.rejectSource(id); + sources = sources.filter((s) => s.id !== id); + }, + + async discoverSources(regionId: string) { + const result = await api.discoverSources(regionId); + await loadSources(); // refresh to include new suggestions + return result; + }, + + // ── Feed Actions ───────────────────────────────────────────── + async saveEvent(eventId: string) { + const event = feed.find((e) => e.id === eventId); + if (!event) return; + + await api.setEventAction(eventId, 'save'); + feed = feed.map((e) => (e.id === eventId ? { ...e, userAction: 'save' as const } : e)); + + // Create a local socialEvent from the discovered event + const startMs = new Date(event.startAt).getTime(); + const fallbackEnd = new Date(startMs + 2 * 60 * 60 * 1000).toISOString(); + await eventsStore.createEvent({ + title: event.title, + startTime: event.startAt, + endTime: event.endAt ?? fallbackEnd, + location: event.location ?? undefined, + description: event.description + ? `${event.description}\n\nQuelle: ${event.sourceUrl}` + : `Quelle: ${event.sourceUrl}`, + }); + }, + + async dismissEvent(eventId: string) { + await api.setEventAction(eventId, 'dismiss'); + feed = feed.filter((e) => e.id !== eventId); + }, +}; diff --git a/apps/mana/apps/web/src/lib/modules/events/discovery/types.ts b/apps/mana/apps/web/src/lib/modules/events/discovery/types.ts new file mode 100644 index 000000000..26cbba623 --- /dev/null +++ b/apps/mana/apps/web/src/lib/modules/events/discovery/types.ts @@ -0,0 +1,72 @@ +/** + * Discovery types — shared between API client, queries, and UI components. + */ + +export interface DiscoveryRegion { + id: string; + label: string; + lat: number; + lon: number; + radiusKm: number; + isActive: boolean; + createdAt: string; +} + +export interface DiscoveryInterest { + id: string; + category: string; + freetext: string | null; + weight: number; + createdAt: string; +} + +export interface DiscoverySource { + id: string; + type: 'ical' | 'website'; + url: string | null; + name: string; + regionId: string | null; + crawlIntervalHours: number; + lastCrawledAt: string | null; + lastSuccessAt: string | null; + errorCount: number; + lastError: string | null; + isActive: boolean; + createdAt: string; + updatedAt: string; +} + +export interface DiscoveredEvent { + id: string; + title: string; + description: string | null; + location: string | null; + lat: number | null; + lon: number | null; + startAt: string; + endAt: string | null; + allDay: boolean; + imageUrl: string | null; + sourceUrl: string; + sourceName: string | null; + category: string | null; + priceInfo: string | null; + crawledAt: string; + userAction: 'save' | 'dismiss' | null; +} + +export const EVENT_CATEGORIES = [ + { id: 'music', label: 'Musik' }, + { id: 'theater', label: 'Theater' }, + { id: 'art', label: 'Kunst' }, + { id: 'tech', label: 'Tech' }, + { id: 'sport', label: 'Sport' }, + { id: 'food', label: 'Kulinarik' }, + { id: 'family', label: 'Familie' }, + { id: 'nature', label: 'Natur' }, + { id: 'education', label: 'Bildung' }, + { id: 'community', label: 'Community' }, + { id: 'nightlife', label: 'Nachtleben' }, + { id: 'market', label: 'Markt' }, + { id: 'other', label: 'Sonstiges' }, +] as const; diff --git a/docs/plans/event-discovery.md b/docs/plans/event-discovery.md new file mode 100644 index 000000000..2db7ad281 --- /dev/null +++ b/docs/plans/event-discovery.md @@ -0,0 +1,711 @@ +# Event Discovery — Implementierungsplan + +## Status (2026-04-17) + +Planung, noch kein Code. + +## Ziel + +Eine KI im Events-Modul, die automatisch öffentliche Veranstaltungen in den Regionen des Nutzers findet, strukturiert und als kuratierten Feed vorschlägt. Der Nutzer konfiguriert Städte/Gebiete + Interessen; das System scannt Event-Kalender, Venue-Websites und Vereinsseiten, dedupliziert und rankt nach Relevanz. + +## Abgrenzung + +- **Eigene Events** (socialEvents, RSVP, Bring-Liste) bleiben unberührt — Discovery ist ein paralleler Read-only-Feed +- **mana-research** wird als Provider-Schicht genutzt (Web-Suche, Extraktion), aber Discovery-Logik lebt in mana-events +- **mana-crawler** wird NICHT direkt genutzt — Firecrawl/Jina über mana-research reichen für Event-Extraktion +- **mana-ai Missions** kommen erst in Phase 3 — Phase 1-2 läuft als dedizierter Cron/API ohne Mission-Runner + +--- + +## Architektur + +``` +Nutzer (Events-Modul, Tab "Entdecken") + │ + ▼ + apps/mana web ──→ mana-events (3065) + │ + ┌────────────┼────────────────┐ + ▼ ▼ ▼ + Discovery API Source Manager Crawl Scheduler + │ │ │ + ▼ ▼ ▼ + PostgreSQL mana-research mana-geocoding + (event_discovery (Web-Suche, (Region → + Schema) Extraktion) BoundingBox) +``` + +### Neue DB-Tabellen (PostgreSQL, Schema `event_discovery` in `mana_platform`) + +```sql +-- Quellen, die regelmäßig gescannt werden +discovery_sources + id uuid PK + user_id text NOT NULL -- Besitzer + type text NOT NULL -- 'ical' | 'website' | 'api' | 'search_query' + url text -- Feed-URL oder Website-URL + name text NOT NULL -- "Jazzhaus Freiburg", "VHS Konstanz" + region_id uuid FK → discovery_regions + crawl_interval_hours int DEFAULT 24 + last_crawled_at timestamptz + last_success_at timestamptz + error_count int DEFAULT 0 + last_error text + is_active boolean DEFAULT true + created_at timestamptz DEFAULT now() + updated_at timestamptz DEFAULT now() + +-- Regionen des Nutzers +discovery_regions + id uuid PK + user_id text NOT NULL + label text NOT NULL -- "Freiburg", "Basel" + lat double precision + lon double precision + radius_km int DEFAULT 25 + is_active boolean DEFAULT true + created_at timestamptz DEFAULT now() + +-- Nutzer-Interessen für Relevanz-Scoring +discovery_interests + id uuid PK + user_id text NOT NULL + category text NOT NULL -- 'music' | 'tech' | 'sport' | 'art' | ... + freetext text -- "Impro-Theater", "Rust Meetups" + weight real DEFAULT 1.0 -- Nutzer kann priorisieren + created_at timestamptz DEFAULT now() + +-- Gefundene Events (dedupliziert, normalisiert) +discovered_events + id uuid PK + source_id uuid FK → discovery_sources (CASCADE) + external_id text -- Dedupe-Anker (URL oder Hash) + dedupe_hash text NOT NULL -- sha256(lower(title) + date + location) + title text NOT NULL + description text + location text + lat double precision + lon double precision + start_at timestamptz NOT NULL + end_at timestamptz + all_day boolean DEFAULT false + image_url text + source_url text NOT NULL -- Link zur Original-Seite + source_name text -- "Jazzhaus Freiburg" + category text -- LLM-klassifiziert + price_info text -- "Frei", "12 €", "VVK 15 / AK 18" + raw_extracted jsonb -- Rohdaten der LLM-Extraktion + crawled_at timestamptz DEFAULT now() + expires_at timestamptz -- start_at + 1 Tag (für Cleanup) + UNIQUE(dedupe_hash) -- Idempotenz + +-- Nutzer-Interaktion mit entdeckten Events +discovery_user_actions + id uuid PK + user_id text NOT NULL + event_id uuid FK → discovered_events (CASCADE) + action text NOT NULL -- 'save' | 'dismiss' | 'hide_source' + acted_at timestamptz DEFAULT now() + UNIQUE(user_id, event_id) + +-- Indizes +CREATE INDEX idx_discovered_events_start ON discovered_events(start_at); +CREATE INDEX idx_discovered_events_source ON discovered_events(source_id); +CREATE INDEX idx_discovery_sources_user ON discovery_sources(user_id, is_active); +CREATE INDEX idx_discovery_regions_user ON discovery_regions(user_id); +CREATE INDEX idx_discovery_actions_user ON discovery_user_actions(user_id); +``` + +### Lokale Tabellen (Dexie) — nur Cache + Offline + +Discovery-Daten sind **nicht** local-first (sie entstehen auf dem Server). Dexie dient nur als Offline-Cache: + +``` +discoveryRegions — id, label, lat, lon, radiusKm, isActive +discoveryInterests — id, category, freetext, weight +discoveredEvents — id, title, description, location, lat, lon, + startAt, endAt, sourceUrl, sourceName, + imageUrl, category, priceInfo, relevanceScore, + userAction (null|save|dismiss), crawledAt +``` + +Keine Verschlüsselung nötig — das sind öffentliche Event-Daten, keine User-Inhalte. + +Kein Sync über mana-sync — der Server ist die Source of Truth, Client pollt/cached. + +--- + +## Phase 1 — Regionen, iCal-Feeds, Discovery-Tab + +**Ziel:** Nutzer kann Regionen + iCal-Feeds manuell konfigurieren. Events werden geparst und im "Entdecken"-Tab angezeigt. + +### 1.1 Backend: DB-Schema + CRUD-Routen (mana-events) + +**Dateien:** + +``` +services/mana-events/src/db/schema/ + discovery.ts ← NEU: Drizzle-Schema für alle 5 Tabellen + +services/mana-events/src/routes/ + discovery.ts ← NEU: CRUD für regions, interests, sources + discovery-feed.ts ← NEU: Feed-Endpoint (paginiert, gefiltert) + +services/mana-events/src/app.ts + → Neue Routen registrieren unter /api/v1/discovery/* +``` + +**API-Endpunkte (alle JWT-authentifiziert):** + +``` +# Regionen +GET /api/v1/discovery/regions → [{id, label, lat, lon, radiusKm}] +POST /api/v1/discovery/regions ← {label, lat, lon, radiusKm} +PUT /api/v1/discovery/regions/:id ← {label?, radiusKm?, isActive?} +DELETE /api/v1/discovery/regions/:id + +# Interessen +GET /api/v1/discovery/interests → [{id, category, freetext, weight}] +POST /api/v1/discovery/interests ← {category, freetext?, weight?} +DELETE /api/v1/discovery/interests/:id + +# Quellen (Phase 1: nur iCal) +GET /api/v1/discovery/sources → [{id, type, url, name, region, status}] +POST /api/v1/discovery/sources ← {type: 'ical', url, name, regionId} +DELETE /api/v1/discovery/sources/:id +POST /api/v1/discovery/sources/:id/crawl → Sofort-Crawl auslösen + +# Feed +GET /api/v1/discovery/feed → {events: [...], total, hasMore} + ?from=ISO&to=ISO&category=music&limit=20&offset=0 +POST /api/v1/discovery/feed/:eventId/action ← {action: 'save' | 'dismiss'} +``` + +### 1.2 Backend: iCal-Parser + Crawl-Loop + +**Dateien:** + +``` +services/mana-events/src/discovery/ + ical-parser.ts ← iCal → discovered_events (ical.js oder node-ical) + crawl-scheduler.ts ← Interval-basierter Crawl-Loop (wie rateBucketSweeper) + deduplicator.ts ← sha256(lower(title) + startAt.toISODate() + lower(location)) + types.ts ← NormalizedEvent, CrawlResult, SourceStatus +``` + +**Ablauf eines Crawl-Zyklus:** + +``` +crawl-scheduler.ts (runs every 15 min) + │ + ├─ SELECT sources WHERE is_active AND last_crawled_at < now() - interval_hours + │ + ├─ Für jede fällige Source: + │ ├─ fetch(source.url) mit 10s Timeout + │ ├─ ical-parser.ts: VEVENT → NormalizedEvent[] + │ ├─ deduplicator.ts: dedupe_hash berechnen + │ ├─ UPSERT INTO discovered_events ON CONFLICT(dedupe_hash) + │ │ → Bestehende: title/description/location updaten falls geändert + │ │ → Neue: INSERT + │ ├─ UPDATE source SET last_crawled_at, last_success_at, error_count=0 + │ └─ Bei Fehler: error_count++, last_error setzen + │ → Nach 5 Fehlern: is_active = false (Nutzer wird informiert) + │ + └─ DELETE FROM discovered_events WHERE expires_at < now() + (Cleanup abgelaufener Events) +``` + +**Dependency:** `node-ical` (Bun-kompatibel, ~50KB, parst VEVENT/VTODO/VFREEBUSY) + +### 1.3 Frontend: Discovery-Tab + Regionen-Setup + +**Dateien:** + +``` +apps/mana/apps/web/src/lib/modules/events/ + discovery/ + api.ts ← HTTP-Client (fetchWithAuth, analog events/api.ts) + types.ts ← DiscoveredEvent, DiscoveryRegion, DiscoveryInterest, etc. + queries.svelte.ts ← Reactive state ($state) für Feed, Regionen, Interessen + stores.svelte.ts ← Mutationen (addRegion, addSource, saveEvent, dismissEvent) + + components/ + DiscoveryTab.svelte ← Der neue Tab-Inhalt + DiscoverySetup.svelte ← Onboarding: Regionen + Interessen konfigurieren + DiscoveredEventCard.svelte ← Karte mit Titel, Datum, Ort, Quelle, Aktionen + SourceManager.svelte ← iCal-Feed-URLs verwalten (hinzufügen, Status, löschen) + RegionPicker.svelte ← Stadt-Suche via mana-geocoding + Radius-Slider + + ListView.svelte ← Tab-Navigation ergänzen: "Meine Events" | "Entdecken" +``` + +**ListView.svelte — Tab-Erweiterung:** + +```svelte + + +
+ + +
+ +{#if activeTab === 'mine'} + +{:else} + +{/if} +``` + +**DiscoveryTab.svelte — Aufbau:** + +```svelte +{#if !hasRegions} + +{:else} +
+ + + +
+ + {#if feed.value.length === 0} +

Noch keine Events gefunden. Füge iCal-Feeds hinzu oder warte auf den nächsten Scan.

+ {:else} + {#each feed.value as event (event.id)} + actions.save(event.id)} + onDismiss={() => actions.dismiss(event.id)} + onOpen={() => window.open(event.sourceUrl, '_blank')} + /> + {/each} + {/if} + + +{/if} +``` + +**DiscoveredEventCard.svelte — Felder:** + +- Titel (fett) +- Datum + Uhrzeit (formatiert, relativ: "Morgen, 19:00" / "Sa 26. Apr, 20:00") +- Ort + Entfernung zum nächsten Region-Zentrum +- Quelle (Link zur Original-Seite) +- Kategorie-Badge (Musik, Tech, Sport, ...) +- Preis-Info falls vorhanden +- Aktionen: "Merken" (→ eigenes socialEvent anlegen), "Nicht interessant", "Zur Quelle" + +**"Merken"-Flow:** + +``` +Nutzer klickt "Merken" + → discoveryStore.saveEvent(discoveredEvent) + → eventsStore.createEvent({ + title: event.title, + startTime: event.startAt, + endTime: event.endAt, + location: event.location, + description: `${event.description}\n\nQuelle: ${event.sourceUrl}`, + locationLat: event.lat, + locationLon: event.lon, + }) + → POST /api/v1/discovery/feed/:eventId/action {action: 'save'} + → Karte zeigt "Gespeichert ✓" +``` + +### 1.4 Module-Integration + +**module.config.ts** — Neue Dexie-Tabellen registrieren (Cache-only, kein Sync): + +```typescript +// events/module.config.ts — erweitert +tables: [ + { name: 'socialEvents', syncName: 'events' }, + { name: 'eventGuests' }, + { name: 'eventInvitations' }, + { name: 'eventItems' }, + // NEU: Discovery-Cache (nicht gesynct, rein lokal) + { name: 'discoveryRegions' }, + { name: 'discoveryInterests' }, + { name: 'discoveredEvents' }, +], +``` + +**database.ts** — Indizes: + +```typescript +discoveryRegions: 'id, isActive', +discoveryInterests: 'id, category', +discoveredEvents: 'id, startAt, category, userAction, [startAt+category]', +``` + +**Keine Encryption-Registry** — öffentliche Daten. + +### 1.5 Deliverables Phase 1 + +- [ ] Drizzle-Schema `event_discovery` + `bun run db:push` +- [ ] CRUD-Routen für Regionen, Interessen, Quellen +- [ ] iCal-Parser mit Dedup + Cleanup +- [ ] Crawl-Scheduler (15-Min-Intervall) +- [ ] Feed-Endpoint (paginiert, nach Datum gefiltert) +- [ ] Frontend: Tab-Navigation in ListView +- [ ] Frontend: DiscoverySetup (Regionen + Interessen) +- [ ] Frontend: RegionPicker mit mana-geocoding Autocomplete +- [ ] Frontend: SourceManager (iCal-URLs CRUD) +- [ ] Frontend: DiscoveredEventCard + Feed-Ansicht +- [ ] Frontend: "Merken" → socialEvent anlegen +- [ ] Tests: iCal-Parser Unit-Tests, Feed-Route Integration-Tests + +--- + +## Phase 2 — Automatische Quellen-Entdeckung + LLM-Extraktion + +**Ziel:** Die KI findet selbst Event-Quellen für die Regionen des Nutzers und extrahiert Events von unstrukturierten Websites. + +### 2.1 Quellen-Entdeckung (Meta-Crawl) + +**Neue Datei:** `services/mana-events/src/discovery/source-discoverer.ts` + +**Ablauf:** + +``` +Nutzer fügt Region "Freiburg" hinzu + │ + ├─ Trigger: source-discoverer.discoverForRegion(region) + │ + ├─ Schritt 1: Web-Suche via mana-research + │ Queries (parallel, via mana-research POST /api/v1/search): + │ "Veranstaltungskalender Freiburg ical" + │ "Events Freiburg 2026" + │ "Kulturzentren Freiburg Programm" + │ "Vereine Freiburg Veranstaltungen" + │ "Konzerte Theater Freiburg Termine" + │ + ├─ Schritt 2: Ergebnisse filtern + │ → URLs die auf .ics enden → Typ 'ical' + │ → URLs mit /kalender, /programm, /events, /veranstaltungen → Typ 'website' + │ → Bekannte Plattformen (eventbrite.*/freiburg, meetup.com/*freiburg) → Typ 'api' + │ + ├─ Schritt 3: LLM-Klassifikation (optional, via mana-llm) + │ Prompt: "Ist diese URL eine Event-Quelle? Wenn ja: Name, Typ, Region." + │ → Filtert Noise (Nachrichtenartikel über Events, generische Stadtseiten) + │ + ├─ Schritt 4: Vorschläge speichern + │ → INSERT INTO discovery_sources (status: 'suggested') + │ → Nutzer sieht Vorschläge im SourceManager und kann aktivieren/ablehnen + │ + └─ Schritt 5: Sofort-Crawl für aktivierte Quellen +``` + +**API-Erweiterung:** + +``` +POST /api/v1/discovery/regions/:id/discover-sources + → Triggert Meta-Crawl, returns {suggestedCount} + +GET /api/v1/discovery/sources?status=suggested + → Vorgeschlagene Quellen die der Nutzer noch bestätigen muss + +PUT /api/v1/discovery/sources/:id/activate +PUT /api/v1/discovery/sources/:id/reject +``` + +### 2.2 Website-Extraktion (LLM-basiert) + +**Neue Datei:** `services/mana-events/src/discovery/website-extractor.ts` + +**Ablauf für Typ `website`:** + +``` +Source: { type: 'website', url: 'https://jazzhaus.de/programm' } + │ + ├─ Schritt 1: Seite crawlen via mana-research + │ POST mana-research /api/v1/extract + │ { url: source.url, provider: 'jina' } (oder 'firecrawl') + │ → Markdown-Text der Seite + │ + ├─ Schritt 2: LLM-Extraktion via mana-llm + │ System-Prompt: + │ "Du bist ein Event-Extractor. Extrahiere ALLE kommenden + │ Veranstaltungen von dieser Seite. Pro Event: + │ - title (string, required) + │ - date (ISO 8601, required) + │ - endDate (ISO 8601, optional) + │ - location (string, optional — Venue-Name + Adresse) + │ - description (string, max 300 Zeichen) + │ - category (music|theater|art|tech|sport|food|family|other) + │ - priceInfo (string, optional — z.B. 'VVK 15€ / AK 18€') + │ - imageUrl (string, optional) + │ Antwort als JSON-Array. Ignoriere vergangene Events. + │ Heutiges Datum: {today}" + │ User-Prompt: + │ → JSON-Array von NormalizedEvents + │ + ├─ Schritt 3: Validierung + Normalisierung + │ → Datum parsen (LLMs machen manchmal "25. April 2026" statt ISO) + │ → Geocoding via mana-geocoding falls location vorhanden + │ → dedupe_hash berechnen + │ + └─ Schritt 4: UPSERT INTO discovered_events +``` + +**LLM-Kosten:** ~500-2000 Input-Tokens pro Seite + ~200-500 Output-Tokens. Bei Haiku-Klasse: ~0.001-0.003 $ pro Seite. Bei täglichem Crawl von 50 Quellen: ~$0.05-0.15/Tag. + +### 2.3 Relevanz-Scoring + +**Neue Datei:** `services/mana-events/src/discovery/scorer.ts` + +```typescript +function scoreEvent( + event: DiscoveredEvent, + interests: DiscoveryInterest[], + regions: DiscoveryRegion[], + userActions: Map +): number { + let score = 50; // Basis + + // Kategorie-Match: +20 pro Match mit Nutzer-Interesse (gewichtet) + for (const interest of interests) { + if (event.category === interest.category) score += 20 * interest.weight; + if (interest.freetext && event.title.toLowerCase().includes(interest.freetext.toLowerCase())) + score += 15 * interest.weight; + } + + // Entfernung: -1 pro km über 5km (nah = besser) + const nearestRegion = findNearestRegion(event, regions); + if (nearestRegion) { + const distKm = haversine(event.lat, event.lon, nearestRegion.lat, nearestRegion.lon); + score -= Math.max(0, distKm - 5); + } + + // Zeitnähe: +10 wenn innerhalb 7 Tagen, +5 wenn innerhalb 14 Tagen + const daysUntil = (new Date(event.startAt).getTime() - Date.now()) / 86400000; + if (daysUntil <= 7) score += 10; + else if (daysUntil <= 14) score += 5; + + // Wochenende-Bonus: +5 wenn Sa/So (die meisten Nutzer sind freier) + const dow = new Date(event.startAt).getDay(); + if (dow === 0 || dow === 6) score += 5; + + // Source-Qualität: +5 wenn Source hohe Erfolgsquote hat + // (Phase 3: implizites Feedback aus save/dismiss-Ratio) + + return Math.max(0, Math.min(100, score)); +} +``` + +**Feed-Endpoint erweitert:** `ORDER BY relevance_score DESC, start_at ASC` + +### 2.4 Frontend-Erweiterungen + +**SourceManager.svelte — erweitert:** + +```svelte + + + + +{#if suggestedSources.length > 0} +

Vorgeschlagene Quellen

+ {#each suggestedSources as source} + activateSource(source.id)} + onReject={() => rejectSource(source.id)} + /> + {/each} +{/if} +``` + +**DiscoveredEventCard.svelte — erweitert:** + +- Relevanz-Indikator (farbiger Dot: grün >70, gelb >40, grau <40) +- "Warum vorgeschlagen?"-Tooltip (Kategorie-Match, Nähe, Zeitnähe) +- Kategorie-Badge prominenter + +### 2.5 Deliverables Phase 2 + +- [ ] Source-Discoverer: Web-Suche → iCal/Website-URLs → Vorschläge +- [ ] Website-Extractor: Crawl → LLM-Extraktion → normalisierte Events +- [ ] Relevanz-Scorer mit Kategorie/Distanz/Zeit-Gewichtung +- [ ] API: `/discover-sources`, `/activate`, `/reject` +- [ ] Frontend: "Quellen automatisch finden" + Vorschlags-UI +- [ ] Frontend: Relevanz-Indikator + "Warum vorgeschlagen?" +- [ ] Crawl-Scheduler erweitert: Website-Typ + Fehlerhandling +- [ ] Tests: Website-Extractor mit Mock-HTML, Scorer Unit-Tests + +--- + +## Phase 3 — mana-ai Integration + Proaktive Vorschläge + +**Ziel:** Discovery wird zu einem AI-Tool. Mana-AI-Missions können proaktiv Events finden und vorschlagen. + +### 3.1 AI-Tool: `discover_events` + +**In `@mana/shared-ai`:** + +```typescript +{ + name: 'discover_events', + description: 'Suche öffentliche Veranstaltungen in den konfigurierten Regionen des Nutzers', + parameters: { + query: { type: 'string', description: 'Optionaler Suchtext (z.B. "Jazz Konzerte")' }, + category: { type: 'string', description: 'Kategorie-Filter' }, + days_ahead: { type: 'number', description: 'Wie viele Tage voraus (default 14)' }, + }, + defaultPolicy: 'auto', // Read-only, kann im Reasoning-Loop laufen +} +``` + +**Server-side (mana-ai):** Ruft `mana-events /api/v1/discovery/feed` auf, injiziert Ergebnisse als ResolvedInput. + +### 3.2 AI-Tool: `suggest_event` + +```typescript +{ + name: 'suggest_event', + description: 'Schlage dem Nutzer ein entdecktes Event vor (erscheint als Proposal)', + parameters: { + discovered_event_id: { type: 'string', required: true }, + reason: { type: 'string', description: 'Warum dieses Event relevant ist' }, + }, + defaultPolicy: 'propose', // Nutzer muss bestätigen +} +``` + +**Approve-Handler:** Führt den "Merken"-Flow aus (discoveredEvent → socialEvent). + +### 3.3 Proaktive Mission: "Event-Scout" + +Als **Agent-Template** (analog Recherche-Agent): + +```typescript +{ + name: 'Event-Scout', + description: 'Findet regelmäßig Events in deinen Regionen und schlägt passende vor', + defaultMissions: [ + { + objective: 'Prüfe neue Events in meinen Regionen. Schlage die 3-5 relevantesten vor, die ich noch nicht gesehen habe.', + cadence: 'daily', + isPaused: false, + } + ], + policy: { + discover_events: 'auto', + suggest_event: 'propose', + } +} +``` + +### 3.4 Feedback-Loop + +**Implizites Profil aus Nutzer-Aktionen:** + +``` +save_count(category=music) / total_music_shown → music_affinity +dismiss_count(source=X) / total_from_X → source_quality + +→ Gewichtung in scorer.ts anpassen: + - Kategorien mit hoher Affinity: weight * 1.5 + - Quellen mit niedriger Qualität: weight * 0.5 + - Quellen mit >80% dismiss: deaktivieren + Nutzer informieren +``` + +### 3.5 Notifications + +Via `mana-notify`: + +- **Täglicher Digest** (optional): "5 neue Events in Freiburg diese Woche" +- **Highlight-Alert** (optional): Push bei Events mit Score >90 +- **Source-Status**: "iCal-Feed von Jazzhaus ist seit 3 Tagen nicht erreichbar" + +### 3.6 Deliverables Phase 3 + +- [ ] AI-Tools `discover_events` + `suggest_event` in shared-ai + mana-ai +- [ ] Agent-Template "Event-Scout" +- [ ] Feedback-Loop: implizites Profil → Scorer-Gewichtung +- [ ] Notification-Integration (täglicher Digest, Highlight-Alert) +- [ ] Tests: AI-Tool Unit-Tests, Feedback-Aggregation + +--- + +## Phase 4 — Event-Plattform-APIs + Social + +**Ziel:** Strukturierte APIs von Event-Plattformen anbinden für höhere Datenqualität. + +### 4.1 Provider-Adapter + +``` +services/mana-events/src/discovery/providers/ + base.ts ← Interface: fetchEvents(region, dateRange) → NormalizedEvent[] + ical.ts ← Bestehender iCal-Parser (refactored) + website.ts ← Bestehender Website-Extractor (refactored) + eventbrite.ts ← Eventbrite API (OAuth, kostenlos für Reads) + meetup.ts ← Meetup GraphQL API + facebook-events.ts ← Meta Graph API (eingeschränkt, braucht App Review) +``` + +### 4.2 Stadt-Portale + +Viele Städte haben halbstrukturierte Event-Kalender: + +- freiburg.de/veranstaltungen → RSS/Atom wo vorhanden, sonst Website-Extractor +- basel.ch/events → Ähnlich +- Tourismus-Seiten (Schwarzwald-Tourismus, Basel-Tourismus) + +→ Diese werden als `type: 'website'` Quellen mit spezifischen Crawl-Hinweisen angelegt. + +### 4.3 Deliverables Phase 4 + +- [ ] Provider-Adapter-Interface + Refactoring bestehender Parser +- [ ] Eventbrite-Provider +- [ ] Meetup-Provider +- [ ] Stadt-Portal-Unterstützung (optimierte Extraktion) + +--- + +## Abhängigkeiten + +| Service | Rolle | Schon vorhanden? | +|---------|-------|-------------------| +| mana-events (3065) | Hosting der Discovery-Logik + DB | Ja, wird erweitert | +| mana-research (3068) | Web-Suche + Extraktion | Ja | +| mana-geocoding (3018) | Region-Geocoding + Distanzberechnung | Ja | +| mana-llm | LLM-Aufrufe für Extraktion + Klassifikation | Ja | +| mana-credits | Kosten-Tracking für LLM + Research-Calls | Ja | +| mana-notify (3024) | Push-Notifications für Digests | Ja | +| mana-ai (3067) | Mission-Runner für proaktive Vorschläge | Ja, Phase 3 | + +**Neue npm-Dependencies:** + +- `node-ical` — iCal-Parsing (Phase 1) +- Keine weiteren — alles andere ist über bestehende Services abgedeckt + +--- + +## Risiken + Mitigationen + +| Risiko | Mitigation | +|--------|------------| +| iCal-Feeds kaputt / nicht-standard | Robuster Parser + error_count + Auto-Deaktivierung nach 5 Fehlern | +| LLM-Extraktion unzuverlässig | Structured Output (JSON-Mode), Validierung, Fallback auf Regex-Extraktion für bekannte Formate | +| Zu viele irrelevante Events | Relevanz-Scoring + Dismiss-Feedback + Source-Qualitäts-Tracking | +| Hohe LLM-Kosten bei vielen Quellen | Haiku-Klasse nutzen, Caching (gleiche Seite → kein Re-Extract wenn unverändert), Rate-Limits pro User | +| Geocoding-Ungenauigkeit | Fallback: Events ohne Koordinaten bekommen Region-Zentrum + maximalen Radius | +| DSGVO: öffentliche Events speichern | Events sind öffentlich publiziert, kein personenbezogener Inhalt. User-Actions (save/dismiss) sind personal data → Löschung bei Account-Delete | + +--- + +## Empfehlung + +**Phase 1 zuerst bauen.** Das allein ist schon wertvoll — ein Nutzer, der 10 iCal-Feeds seiner Lieblings-Venues einträgt, bekommt einen aggregierten Event-Feed ohne dass je eine KI laufen muss. Phase 2 macht es dann intelligent (automatische Quellen-Entdeckung + unstrukturierte Seiten). Phase 3 macht es proaktiv (KI schlägt Events vor). Phase 4 ist nice-to-have. + +Geschätzter Aufwand Phase 1: Backend ~1.5 Tage, Frontend ~1.5 Tage, Tests ~0.5 Tage = **~3.5 Tage**. diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c128a0fcf..1107ae999 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -3463,6 +3463,9 @@ importers: jose: specifier: ^6.1.2 version: 6.2.2 + node-ical: + specifier: ^0.26.0 + version: 0.26.0 postgres: specifier: ^3.4.5 version: 3.4.9 @@ -6788,6 +6791,10 @@ packages: '@jridgewell/trace-mapping@0.3.9': resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==} + '@js-temporal/polyfill@0.5.1': + resolution: {integrity: sha512-hloP58zRVCRSpgDxmqCWJNlizAlUgJFqG2ypq79DCvyv9tHjRYMDOcPFjzfl/A1/YxDvRCZz8wvZvmapQnKwFQ==} + engines: {node: '>=12'} + '@ljharb/through@2.3.14': resolution: {integrity: sha512-ajBvlKpWucBB17FuQYUShqpqy8GRgYEpJW0vWJbUu1CV9lWyrDCapy0lScU8T8Z6qn49sSwJB3+M+evYIdGg+A==} engines: {node: '>= 0.4'} @@ -13490,6 +13497,9 @@ packages: resolution: {integrity: sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==} hasBin: true + jsbi@4.3.2: + resolution: {integrity: sha512-9fqMSQbhJykSeii05nxKl4m6Eqn2P6rOlYiS+C5Dr/HPIU/7yZxu5qzbs40tgaFORiw2Amd0mirjxatXYMkIew==} + jsc-safe-url@0.2.4: resolution: {integrity: sha512-0wM3YBWtYePOjfyXQH5MWQ8H7sdk5EXSwZvmSLKk2RboVQ2Bu239jycHDz5J/8Blf3K0Qnoy2b6xD+z10MFB+Q==} @@ -14553,6 +14563,10 @@ packages: resolution: {integrity: sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw==} hasBin: true + node-ical@0.26.0: + resolution: {integrity: sha512-tJZY2fMb38Gbj0P05zHMWBr90MslhGZ1qEbOWYnokBYPPX/lYskL/0NnWoeiXTBNod+kRRcTOjxAeB20kfvKyw==} + engines: {node: '>=20'} + node-int64@0.4.0: resolution: {integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==} @@ -15969,6 +15983,9 @@ packages: resolution: {integrity: sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==} engines: {node: '>= 18'} + rrule-temporal@1.5.2: + resolution: {integrity: sha512-I5rAiZfRlMh0vuG23HrGBMLZOSiQO7H1Uq8l9qyfA6oTD5j+UMRwpRs4aVU4XdaFhgN1p3K+cHelG8KvLTTm+g==} + rrule@2.8.1: resolution: {integrity: sha512-hM3dHSBMeaJ0Ktp7W38BJZ7O1zOgaFEsn41PDk+yHoEtfLV+PoJt9E9xAlZiWgf/iqEqionN0ebHFZIDAp+iGw==} @@ -16619,6 +16636,12 @@ packages: resolution: {integrity: sha512-aoBAniQmmwtcKp/7BzsH8Cxzv8OL736p7v1ihGb5e9DJ9kTwGWHrQrVB5+lfVDzfGrdRzXch+ig7LHaY1JTOrg==} engines: {node: '>=8'} + temporal-polyfill@0.3.2: + resolution: {integrity: sha512-TzHthD/heRK947GNiSu3Y5gSPpeUDH34+LESnfsq8bqpFhsB79HFBX8+Z834IVX68P3EUyRPZK5bL/1fh437Eg==} + + temporal-spec@0.3.1: + resolution: {integrity: sha512-B4TUhezh9knfSIMwt7RVggApDRJZo73uZdj8AacL2mZ8RP5KtLianh2MXxL06GN9ESYiIsiuoLQhgVfwe55Yhw==} + tempy@0.6.0: resolution: {integrity: sha512-G13vtMYPT/J8A4X2SjdtBTphZlrp1gKv6hZiOjw14RCWg6GbHuQBGtjlx75xLbYV/wEc0D7G5K4rxKP/cXk8Bw==} engines: {node: '>=10'} @@ -22113,6 +22136,10 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.5 + '@js-temporal/polyfill@0.5.1': + dependencies: + jsbi: 4.3.2 + '@ljharb/through@2.3.14': dependencies: call-bind: 1.0.8 @@ -32512,6 +32539,8 @@ snapshots: dependencies: argparse: 2.0.1 + jsbi@4.3.2: {} + jsc-safe-url@0.2.4: {} jsdom@25.0.1: @@ -34094,6 +34123,11 @@ snapshots: detect-libc: 2.1.2 optional: true + node-ical@0.26.0: + dependencies: + rrule-temporal: 1.5.2 + temporal-polyfill: 0.3.2 + node-int64@0.4.0: {} node-mock-http@1.0.4: {} @@ -36082,6 +36116,10 @@ snapshots: transitivePeerDependencies: - supports-color + rrule-temporal@1.5.2: + dependencies: + '@js-temporal/polyfill': 0.5.1 + rrule@2.8.1: dependencies: tslib: 2.8.1 @@ -36876,6 +36914,12 @@ snapshots: temp-dir@2.0.0: {} + temporal-polyfill@0.3.2: + dependencies: + temporal-spec: 0.3.1 + + temporal-spec@0.3.1: {} + tempy@0.6.0: dependencies: is-stream: 2.0.1 diff --git a/services/mana-events/drizzle.config.ts b/services/mana-events/drizzle.config.ts index a6404d0f7..ad0810739 100644 --- a/services/mana-events/drizzle.config.ts +++ b/services/mana-events/drizzle.config.ts @@ -7,5 +7,5 @@ export default defineConfig({ dbCredentials: { url: process.env.DATABASE_URL || 'postgresql://mana:devpassword@localhost:5432/mana_platform', }, - schemaFilter: ['events'], + schemaFilter: ['events', 'event_discovery'], }); diff --git a/services/mana-events/package.json b/services/mana-events/package.json index 7b6758020..ca4c3189f 100644 --- a/services/mana-events/package.json +++ b/services/mana-events/package.json @@ -12,10 +12,11 @@ "db:studio": "drizzle-kit studio" }, "dependencies": { - "hono": "^4.7.0", "drizzle-orm": "^0.38.3", - "postgres": "^3.4.5", + "hono": "^4.7.0", "jose": "^6.1.2", + "node-ical": "^0.26.0", + "postgres": "^3.4.5", "zod": "^3.24.0" }, "devDependencies": { diff --git a/services/mana-events/src/__tests__/deduplicator.test.ts b/services/mana-events/src/__tests__/deduplicator.test.ts new file mode 100644 index 000000000..6a01a891b --- /dev/null +++ b/services/mana-events/src/__tests__/deduplicator.test.ts @@ -0,0 +1,80 @@ +/** + * Deduplicator unit tests — no DB required. + */ + +import { describe, it, expect } from 'bun:test'; +import { computeDedupeHash } from '../discovery/deduplicator'; +import type { NormalizedEvent } from '../discovery/types'; + +function makeEvent(overrides: Partial = {}): NormalizedEvent { + return { + title: 'Jazz Night', + startAt: new Date('2026-05-01T19:00:00Z'), + sourceUrl: 'https://example.com/event', + location: 'Jazzhaus Freiburg', + ...overrides, + }; +} + +describe('computeDedupeHash', () => { + it('produces a hex string', async () => { + const hash = await computeDedupeHash(makeEvent()); + expect(hash).toMatch(/^[0-9a-f]{64}$/); + }); + + it('is deterministic (same input = same hash)', async () => { + const a = await computeDedupeHash(makeEvent()); + const b = await computeDedupeHash(makeEvent()); + expect(a).toBe(b); + }); + + it('differs when title changes', async () => { + const a = await computeDedupeHash(makeEvent({ title: 'Jazz Night' })); + const b = await computeDedupeHash(makeEvent({ title: 'Rock Night' })); + expect(a).not.toBe(b); + }); + + it('differs when date changes', async () => { + const a = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-01T19:00:00Z') })); + const b = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-02T19:00:00Z') })); + expect(a).not.toBe(b); + }); + + it('differs when location changes', async () => { + const a = await computeDedupeHash(makeEvent({ location: 'Jazzhaus Freiburg' })); + const b = await computeDedupeHash(makeEvent({ location: 'E-Werk Freiburg' })); + expect(a).not.toBe(b); + }); + + it('is case-insensitive (title)', async () => { + const a = await computeDedupeHash(makeEvent({ title: 'Jazz Night' })); + const b = await computeDedupeHash(makeEvent({ title: 'jazz night' })); + expect(a).toBe(b); + }); + + it('is case-insensitive (location)', async () => { + const a = await computeDedupeHash(makeEvent({ location: 'Jazzhaus Freiburg' })); + const b = await computeDedupeHash(makeEvent({ location: 'jazzhaus freiburg' })); + expect(a).toBe(b); + }); + + it('treats null and empty location the same', async () => { + const a = await computeDedupeHash(makeEvent({ location: null })); + const b = await computeDedupeHash(makeEvent({ location: '' })); + expect(a).toBe(b); + }); + + it('ignores time-of-day (same calendar date = same hash)', async () => { + const a = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-01T10:00:00Z') })); + const b = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-01T22:00:00Z') })); + expect(a).toBe(b); + }); + + it('trims whitespace from title and location', async () => { + const a = await computeDedupeHash( + makeEvent({ title: ' Jazz Night ', location: ' Jazzhaus ' }) + ); + const b = await computeDedupeHash(makeEvent({ title: 'Jazz Night', location: 'Jazzhaus' })); + expect(a).toBe(b); + }); +}); diff --git a/services/mana-events/src/__tests__/discovery.test.ts b/services/mana-events/src/__tests__/discovery.test.ts new file mode 100644 index 000000000..3177ecb8b --- /dev/null +++ b/services/mana-events/src/__tests__/discovery.test.ts @@ -0,0 +1,389 @@ +/** + * Discovery route integration tests. + * + * Tests CRUD for regions, interests, sources, and the feed endpoint. + * Uses the same mock-auth pattern as existing mana-events tests. + */ + +import { describe, it, expect, beforeEach, afterAll } from 'bun:test'; +import { buildTestApp, authedRequest, jsonBody, TEST_USER_ID, OTHER_USER_ID } from './helpers'; + +const app = buildTestApp(); + +const BASE = 'http://test/api/v1/discovery'; + +beforeEach(async () => { + await app.wipe(); +}); + +afterAll(async () => { + await app.wipe(); +}); + +// ─── Helper ───────────────────────────────────────────────────────── + +async function createRegion(label = 'Freiburg', lat = 47.997, lon = 7.842, user = TEST_USER_ID) { + const res = await app.fetch( + authedRequest(`${BASE}/regions`, { + method: 'POST', + body: jsonBody({ label, lat, lon, radiusKm: 25 }), + user, + }) + ); + expect(res.status).toBe(201); + const data = await res.json(); + return data.region; +} + +// ─── Regions ──────────────────────────────────────────────────────── + +describe('Discovery Regions', () => { + it('creates a region', async () => { + const region = await createRegion(); + expect(region.label).toBe('Freiburg'); + expect(region.lat).toBe(47.997); + expect(region.lon).toBe(7.842); + expect(region.radiusKm).toBe(25); + expect(region.isActive).toBe(true); + }); + + it('lists only own regions', async () => { + await createRegion('Freiburg', 47.997, 7.842, TEST_USER_ID); + await createRegion('Basel', 47.559, 7.589, OTHER_USER_ID); + + const res = await app.fetch(authedRequest(`${BASE}/regions`)); + const { regions } = await res.json(); + expect(regions).toHaveLength(1); + expect(regions[0].label).toBe('Freiburg'); + }); + + it('updates a region', async () => { + const region = await createRegion(); + const res = await app.fetch( + authedRequest(`${BASE}/regions/${region.id}`, { + method: 'PUT', + body: jsonBody({ radiusKm: 50, label: 'Freiburg im Breisgau' }), + }) + ); + expect(res.status).toBe(200); + const { region: updated } = await res.json(); + expect(updated.radiusKm).toBe(50); + expect(updated.label).toBe('Freiburg im Breisgau'); + }); + + it('rejects updating another user region', async () => { + const region = await createRegion('Basel', 47.559, 7.589, OTHER_USER_ID); + const res = await app.fetch( + authedRequest(`${BASE}/regions/${region.id}`, { + method: 'PUT', + body: jsonBody({ radiusKm: 100 }), + }) + ); + expect(res.status).toBe(404); + }); + + it('deletes a region (cascades to sources)', async () => { + const region = await createRegion(); + const delRes = await app.fetch( + authedRequest(`${BASE}/regions/${region.id}`, { method: 'DELETE' }) + ); + expect(delRes.status).toBe(200); + + const listRes = await app.fetch(authedRequest(`${BASE}/regions`)); + const { regions } = await listRes.json(); + expect(regions).toHaveLength(0); + }); + + it('rejects invalid coordinates', async () => { + const res = await app.fetch( + authedRequest(`${BASE}/regions`, { + method: 'POST', + body: jsonBody({ label: 'Bad', lat: 999, lon: 7.0 }), + }) + ); + expect(res.status).toBe(400); + }); +}); + +// ─── Interests ────────────────────────────────────────────────────── + +describe('Discovery Interests', () => { + it('creates and lists interests', async () => { + const res1 = await app.fetch( + authedRequest(`${BASE}/interests`, { + method: 'POST', + body: jsonBody({ category: 'music' }), + }) + ); + expect(res1.status).toBe(201); + + const res2 = await app.fetch( + authedRequest(`${BASE}/interests`, { + method: 'POST', + body: jsonBody({ category: 'tech', freetext: 'Rust Meetups', weight: 2.0 }), + }) + ); + expect(res2.status).toBe(201); + + const listRes = await app.fetch(authedRequest(`${BASE}/interests`)); + const { interests } = await listRes.json(); + expect(interests).toHaveLength(2); + expect(interests.find((i: { category: string }) => i.category === 'tech').freetext).toBe( + 'Rust Meetups' + ); + expect(interests.find((i: { category: string }) => i.category === 'tech').weight).toBe(2.0); + }); + + it('deletes an interest', async () => { + const createRes = await app.fetch( + authedRequest(`${BASE}/interests`, { + method: 'POST', + body: jsonBody({ category: 'sport' }), + }) + ); + const { interest } = await createRes.json(); + + const delRes = await app.fetch( + authedRequest(`${BASE}/interests/${interest.id}`, { method: 'DELETE' }) + ); + expect(delRes.status).toBe(200); + + const listRes = await app.fetch(authedRequest(`${BASE}/interests`)); + const { interests } = await listRes.json(); + expect(interests).toHaveLength(0); + }); +}); + +// ─── Sources ──────────────────────────────────────────────────────── + +describe('Discovery Sources', () => { + it('creates a source linked to a region', async () => { + const region = await createRegion(); + const res = await app.fetch( + authedRequest(`${BASE}/sources`, { + method: 'POST', + body: jsonBody({ + type: 'ical', + url: 'https://example.com/events.ics', + name: 'Test Venue', + regionId: region.id, + }), + }) + ); + expect(res.status).toBe(201); + const { source } = await res.json(); + expect(source.name).toBe('Test Venue'); + expect(source.type).toBe('ical'); + expect(source.regionId).toBe(region.id); + expect(source.isActive).toBe(true); + expect(source.errorCount).toBe(0); + }); + + it('rejects source with invalid region', async () => { + const res = await app.fetch( + authedRequest(`${BASE}/sources`, { + method: 'POST', + body: jsonBody({ + type: 'ical', + url: 'https://example.com/events.ics', + name: 'Bad', + regionId: '00000000-0000-0000-0000-000000000000', + }), + }) + ); + expect(res.status).toBe(400); + }); + + it('rejects source for another user region', async () => { + const region = await createRegion('Basel', 47.559, 7.589, OTHER_USER_ID); + const res = await app.fetch( + authedRequest(`${BASE}/sources`, { + method: 'POST', + body: jsonBody({ + type: 'ical', + url: 'https://example.com/events.ics', + name: 'Sneaky', + regionId: region.id, + }), + }) + ); + expect(res.status).toBe(400); + }); + + it('lists only own sources', async () => { + const region1 = await createRegion('FR', 47.997, 7.842, TEST_USER_ID); + const region2 = await createRegion('BS', 47.559, 7.589, OTHER_USER_ID); + + await app.fetch( + authedRequest(`${BASE}/sources`, { + method: 'POST', + body: jsonBody({ + type: 'ical', + url: 'https://a.com/cal.ics', + name: 'A', + regionId: region1.id, + }), + }) + ); + await app.fetch( + authedRequest(`${BASE}/sources`, { + method: 'POST', + body: jsonBody({ + type: 'ical', + url: 'https://b.com/cal.ics', + name: 'B', + regionId: region2.id, + }), + user: OTHER_USER_ID, + }) + ); + + const res = await app.fetch(authedRequest(`${BASE}/sources`)); + const { sources } = await res.json(); + expect(sources).toHaveLength(1); + expect(sources[0].name).toBe('A'); + }); + + it('deletes a source', async () => { + const region = await createRegion(); + const createRes = await app.fetch( + authedRequest(`${BASE}/sources`, { + method: 'POST', + body: jsonBody({ + type: 'ical', + url: 'https://x.com/cal.ics', + name: 'X', + regionId: region.id, + }), + }) + ); + const { source } = await createRes.json(); + + const delRes = await app.fetch( + authedRequest(`${BASE}/sources/${source.id}`, { method: 'DELETE' }) + ); + expect(delRes.status).toBe(200); + }); +}); + +// ─── Feed ─────────────────────────────────────────────────────────── + +describe('Discovery Feed', () => { + it('returns empty feed when no sources exist', async () => { + await createRegion(); + const res = await app.fetch(authedRequest(`${BASE}/feed`)); + expect(res.status).toBe(200); + const { events, hasMore } = await res.json(); + expect(events).toHaveLength(0); + expect(hasMore).toBe(false); + }); + + it('records save action', async () => { + const region = await createRegion(); + + // Insert a discovered event directly to test the action endpoint + const { sql: rawSql } = await import('drizzle-orm'); + const futureDate = new Date(Date.now() + 7 * 86400000).toISOString(); + + // First create a source so we have a source_id for the FK + const srcRes = await app.fetch( + authedRequest(`${BASE}/sources`, { + method: 'POST', + body: jsonBody({ + type: 'ical', + url: 'https://test.com/cal.ics', + name: 'Test', + regionId: region.id, + }), + }) + ); + const { source } = await srcRes.json(); + + // Insert event directly into DB + await app.db.execute( + rawSql`INSERT INTO event_discovery.discovered_events + (id, source_id, dedupe_hash, title, start_at, source_url, crawled_at) + VALUES ( + '00000000-0000-0000-0000-000000000001', + ${source.id}::uuid, + 'testhash123', + 'Test Event', + ${futureDate}::timestamptz, + 'https://example.com/event', + now() + )` + ); + + // Record action + const actionRes = await app.fetch( + authedRequest(`${BASE}/feed/00000000-0000-0000-0000-000000000001/action`, { + method: 'POST', + body: jsonBody({ action: 'save' }), + }) + ); + expect(actionRes.status).toBe(200); + + // Verify the action shows in feed + const feedRes = await app.fetch(authedRequest(`${BASE}/feed`)); + const { events } = await feedRes.json(); + expect(events).toHaveLength(1); + expect(events[0].userAction).toBe('save'); + }); + + it('dismiss action + hideDismissed filters events', async () => { + const region = await createRegion(); + const { sql: rawSql } = await import('drizzle-orm'); + const futureDate = new Date(Date.now() + 7 * 86400000).toISOString(); + + const srcRes = await app.fetch( + authedRequest(`${BASE}/sources`, { + method: 'POST', + body: jsonBody({ + type: 'ical', + url: 'https://test.com/cal.ics', + name: 'Test', + regionId: region.id, + }), + }) + ); + const { source } = await srcRes.json(); + + await app.db.execute( + rawSql`INSERT INTO event_discovery.discovered_events + (id, source_id, dedupe_hash, title, start_at, source_url, crawled_at) + VALUES ( + '00000000-0000-0000-0000-000000000002', + ${source.id}::uuid, + 'hash-dismiss', + 'Dismissed Event', + ${futureDate}::timestamptz, + 'https://example.com/ev2', + now() + )` + ); + + // Dismiss + await app.fetch( + authedRequest(`${BASE}/feed/00000000-0000-0000-0000-000000000002/action`, { + method: 'POST', + body: jsonBody({ action: 'dismiss' }), + }) + ); + + // Without hideDismissed — shows up + const res1 = await app.fetch(authedRequest(`${BASE}/feed`)); + const data1 = await res1.json(); + expect(data1.events).toHaveLength(1); + expect(data1.events[0].userAction).toBe('dismiss'); + + // With hideDismissed — filtered out + const res2 = await app.fetch(authedRequest(`${BASE}/feed?hideDismissed=true`)); + const data2 = await res2.json(); + expect(data2.events).toHaveLength(0); + }); + + it('rejects unauthenticated requests', async () => { + const res = await app.fetch(new Request(`${BASE}/feed`)); + expect(res.status).toBe(401); + }); +}); diff --git a/services/mana-events/src/__tests__/helpers.ts b/services/mana-events/src/__tests__/helpers.ts index 2461887d1..549dfb8fa 100644 --- a/services/mana-events/src/__tests__/helpers.ts +++ b/services/mana-events/src/__tests__/helpers.ts @@ -40,6 +40,8 @@ const TEST_CONFIG: Config = { rsvpPerTokenPerHour: 5, rsvpMaxPerToken: 20, }, + manaResearchUrl: 'http://localhost:3068', + manaLlmUrl: 'http://localhost:3025', }; /** @@ -71,6 +73,12 @@ export function buildTestApp(overrides: Partial = {}): TestApp { async wipe() { // Cascade FK from events_published handles public_rsvps + rate buckets await db.execute(sql`DELETE FROM events.events_published`); + // Discovery tables — cascade handles discovered_events + user_actions + await db.execute(sql`DELETE FROM event_discovery.discovery_user_actions`); + await db.execute(sql`DELETE FROM event_discovery.discovered_events`); + await db.execute(sql`DELETE FROM event_discovery.discovery_sources`); + await db.execute(sql`DELETE FROM event_discovery.discovery_interests`); + await db.execute(sql`DELETE FROM event_discovery.discovery_regions`); }, }; } diff --git a/services/mana-events/src/__tests__/ical-parser.test.ts b/services/mana-events/src/__tests__/ical-parser.test.ts new file mode 100644 index 000000000..5329adcb4 --- /dev/null +++ b/services/mana-events/src/__tests__/ical-parser.test.ts @@ -0,0 +1,188 @@ +/** + * iCal parser unit tests — no DB or network required. + * Uses parseIcalText directly with inline iCal strings. + */ + +import { describe, it, expect } from 'bun:test'; +import { parseIcalText } from '../discovery/ical-parser'; + +const SOURCE_URL = 'https://example.com/events.ics'; +const SOURCE_NAME = 'Test Venue'; + +/** Helper: generate a date N days from now as iCal UTC string (YYYYMMDDTHHMMSSZ). */ +function futureIcalDate(daysAhead: number, hour = 19): string { + const d = new Date(Date.now() + daysAhead * 86400000); + d.setUTCHours(hour, 0, 0, 0); + const pad = (n: number) => n.toString().padStart(2, '0'); + return `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}T${pad(d.getUTCHours())}${pad(d.getUTCMinutes())}${pad(d.getUTCSeconds())}Z`; +} + +function pastIcalDate(daysAgo: number, hour = 19): string { + return futureIcalDate(-daysAgo, hour); +} + +function makeIcal(vevents: string): string { + return `BEGIN:VCALENDAR +VERSION:2.0 +PRODID:-//Test//Test//EN +${vevents} +END:VCALENDAR`; +} + +// ─── Basic parsing ────────────────────────────────────────────────── + +describe('parseIcalText', () => { + it('extracts a single future event', () => { + const ical = makeIcal(`BEGIN:VEVENT +UID:test-uid-1@example.com +DTSTART:${futureIcalDate(3)} +DTEND:${futureIcalDate(3, 21)} +SUMMARY:Jazz Night +DESCRIPTION:Live jazz at the club. +LOCATION:Jazzhaus Freiburg +URL:https://jazzhaus.de/event/1 +END:VEVENT`); + + const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(1); + expect(events[0].title).toBe('Jazz Night'); + expect(events[0].description).toBe('Live jazz at the club.'); + expect(events[0].location).toBe('Jazzhaus Freiburg'); + expect(events[0].externalId).toBe('test-uid-1@example.com'); + expect(events[0].sourceUrl).toBe('https://jazzhaus.de/event/1'); + expect(events[0].allDay).toBe(false); + }); + + it('extracts multiple events', () => { + const ical = makeIcal(`BEGIN:VEVENT +UID:a@test +DTSTART:${futureIcalDate(1)} +SUMMARY:Event A +END:VEVENT +BEGIN:VEVENT +UID:b@test +DTSTART:${futureIcalDate(2)} +SUMMARY:Event B +END:VEVENT +BEGIN:VEVENT +UID:c@test +DTSTART:${futureIcalDate(5)} +SUMMARY:Event C +END:VEVENT`); + + const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(3); + expect(events.map((e) => e.title).sort()).toEqual(['Event A', 'Event B', 'Event C']); + }); + + // ─── Filtering ────────────────────────────────────────────── + + it('filters out past events (> 1 day ago)', () => { + const ical = makeIcal(`BEGIN:VEVENT +UID:past@test +DTSTART:${pastIcalDate(5)} +DTEND:${pastIcalDate(5, 21)} +SUMMARY:Past Event +END:VEVENT +BEGIN:VEVENT +UID:future@test +DTSTART:${futureIcalDate(3)} +SUMMARY:Future Event +END:VEVENT`); + + const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(1); + expect(events[0].title).toBe('Future Event'); + }); + + it('skips events without a summary', () => { + const ical = makeIcal(`BEGIN:VEVENT +UID:no-title@test +DTSTART:${futureIcalDate(2)} +DESCRIPTION:Has no title +END:VEVENT`); + + const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(0); + }); + + it('skips VTODO and VFREEBUSY components', () => { + const ical = makeIcal(`BEGIN:VTODO +UID:todo@test +SUMMARY:Not an event +END:VTODO +BEGIN:VEVENT +UID:real@test +DTSTART:${futureIcalDate(1)} +SUMMARY:Real Event +END:VEVENT`); + + const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(1); + expect(events[0].title).toBe('Real Event'); + }); + + // ─── All-day ──────────────────────────────────────────────── + + it('detects all-day events (DATE type without time)', () => { + const d = new Date(Date.now() + 3 * 86400000); + const pad = (n: number) => n.toString().padStart(2, '0'); + const dateStr = `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}`; + + const ical = makeIcal(`BEGIN:VEVENT +UID:allday@test +DTSTART;VALUE=DATE:${dateStr} +SUMMARY:All Day Festival +END:VEVENT`); + + const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(1); + expect(events[0].allDay).toBe(true); + }); + + // ─── Fallbacks ────────────────────────────────────────────── + + it('uses sourceUrl when event has no URL property', () => { + const ical = makeIcal(`BEGIN:VEVENT +UID:no-url@test +DTSTART:${futureIcalDate(2)} +SUMMARY:No URL Event +END:VEVENT`); + + const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME); + expect(events[0].sourceUrl).toBe(SOURCE_URL); + }); + + it('truncates long descriptions to 2000 chars', () => { + const longDesc = 'A'.repeat(3000); + const ical = makeIcal(`BEGIN:VEVENT +UID:long@test +DTSTART:${futureIcalDate(2)} +SUMMARY:Long Desc +DESCRIPTION:${longDesc} +END:VEVENT`); + + const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME); + expect(events[0].description!.length).toBe(2000); + }); + + it('handles empty calendar gracefully', () => { + const ical = makeIcal(''); + const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(0); + }); + + it('handles optional fields as null', () => { + const ical = makeIcal(`BEGIN:VEVENT +UID:minimal@test +DTSTART:${futureIcalDate(1)} +SUMMARY:Minimal Event +END:VEVENT`); + + const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME); + expect(events[0].description).toBeNull(); + expect(events[0].location).toBeNull(); + // node-ical may synthesize an end date from start when DTEND is missing + expect(events[0].startAt).toBeInstanceOf(Date); + }); +}); diff --git a/services/mana-events/src/__tests__/ical-real-feeds.test.ts b/services/mana-events/src/__tests__/ical-real-feeds.test.ts new file mode 100644 index 000000000..ccb1ed181 --- /dev/null +++ b/services/mana-events/src/__tests__/ical-real-feeds.test.ts @@ -0,0 +1,130 @@ +/** + * Real-world iCal feed tests — validates that parseIcalFeed works with + * actual public iCal feeds from the internet. + * + * These tests hit the network and may be slow or flaky. They exist to + * catch compatibility issues with real-world iCal quirks that synthetic + * test data can't cover. + */ + +import { describe, it, expect } from 'bun:test'; +import { parseIcalFeed } from '../discovery/ical-parser'; +import { parseIcalText } from '../discovery/ical-parser'; + +const TIMEOUT = 20_000; + +describe('Real iCal feeds', () => { + it( + 'parses Mozilla Thunderbird sample calendar', + async () => { + // Mozilla ships a public demo calendar for Thunderbird / Lightning + try { + const events = await parseIcalFeed( + 'https://www.mozilla.org/media/caldata/GermanHolidays.ics', + 'German Holidays' + ); + // May or may not have future events, but should parse without error + expect(Array.isArray(events)).toBe(true); + for (const e of events) { + expect(e.title).toBeTruthy(); + expect(e.startAt).toBeInstanceOf(Date); + } + } catch { + // Network error is acceptable for CI — we're testing the parser + } + }, + TIMEOUT + ); + + it('parses a realistic multi-event iCal with timezones', () => { + // A realistic iCal string with VTIMEZONE, multiple VEVENTs, + // different date formats, and edge cases. + const futureYear = new Date().getFullYear() + 1; + const ical = `BEGIN:VCALENDAR +VERSION:2.0 +PRODID:-//Jazzhaus//Events//DE +X-WR-CALNAME:Jazzhaus Freiburg +BEGIN:VTIMEZONE +TZID:Europe/Berlin +BEGIN:DAYLIGHT +TZOFFSETFROM:+0100 +TZOFFSETTO:+0200 +TZNAME:CEST +DTSTART:19700329T020000 +RRULE:FREQ=YEARLY;BYDAY=-1SU;BYMONTH=3 +END:DAYLIGHT +BEGIN:STANDARD +TZOFFSETFROM:+0200 +TZOFFSETTO:+0100 +TZNAME:CET +DTSTART:19701025T030000 +RRULE:FREQ=YEARLY;BYDAY=-1SU;BYMONTH=10 +END:STANDARD +END:VTIMEZONE +BEGIN:VEVENT +UID:ev1@jazzhaus.de +DTSTART;TZID=Europe/Berlin:${futureYear}0515T200000 +DTEND;TZID=Europe/Berlin:${futureYear}0515T230000 +SUMMARY:Gregory Porter Live +DESCRIPTION:Grammy-winning jazz vocalist performs his greatest hits.\\nSpecial guest: Lizz Wright. +LOCATION:Jazzhaus Freiburg\\, Schnewlinstr. 1 +URL:https://jazzhaus.de/events/gregory-porter +CATEGORIES:MUSIC,JAZZ +END:VEVENT +BEGIN:VEVENT +UID:ev2@jazzhaus.de +DTSTART;VALUE=DATE:${futureYear}0620 +DTEND;VALUE=DATE:${futureYear}0622 +SUMMARY:Freiburg Jazz Festival +DESCRIPTION:Three days of jazz across multiple venues. +LOCATION:Freiburg Altstadt +END:VEVENT +BEGIN:VEVENT +UID:ev3@jazzhaus.de +DTSTART;TZID=Europe/Berlin:${futureYear}0701T190000 +SUMMARY:Open Mic Night +LOCATION:Jazzhaus Freiburg +END:VEVENT +END:VCALENDAR`; + + const events = parseIcalText(ical, 'https://jazzhaus.de/events.ics', 'Jazzhaus'); + expect(events).toHaveLength(3); + + // Check the timezone-aware event + const porter = events.find((e) => e.title === 'Gregory Porter Live')!; + expect(porter).toBeTruthy(); + expect(porter.description).toContain('Grammy-winning'); + expect(porter.location).toContain('Jazzhaus Freiburg'); + expect(porter.sourceUrl).toBe('https://jazzhaus.de/events/gregory-porter'); + expect(porter.allDay).toBe(false); + + // Check all-day event + const festival = events.find((e) => e.title === 'Freiburg Jazz Festival')!; + expect(festival).toBeTruthy(); + expect(festival.allDay).toBe(true); + + // Check minimal event + const openMic = events.find((e) => e.title === 'Open Mic Night')!; + expect(openMic).toBeTruthy(); + expect(openMic.description).toBeNull(); + }); + + it('handles escaped characters in iCal text', () => { + const futureYear = new Date().getFullYear() + 1; + const ical = `BEGIN:VCALENDAR +VERSION:2.0 +BEGIN:VEVENT +UID:escaped@test +DTSTART:${futureYear}0301T190000Z +SUMMARY:Konzert: Rock & Blues\\, feat. "The Band" +DESCRIPTION:Ein Abend voller Musik.\\nMit Special Guests.\\n\\nEintritt: 15\\,00 EUR +LOCATION:E-Werk\\, Eschholzstr. 77\\, 79106 Freiburg +END:VEVENT +END:VCALENDAR`; + + const events = parseIcalText(ical, 'https://test.com', 'Test'); + expect(events).toHaveLength(1); + expect(events[0].title).toContain('Rock & Blues'); + expect(events[0].location).toContain('E-Werk'); + }); +}); diff --git a/services/mana-events/src/__tests__/scorer.test.ts b/services/mana-events/src/__tests__/scorer.test.ts new file mode 100644 index 000000000..6adf18bfb --- /dev/null +++ b/services/mana-events/src/__tests__/scorer.test.ts @@ -0,0 +1,133 @@ +/** + * Relevance scorer unit tests. + */ + +import { describe, it, expect } from 'bun:test'; +import { scoreEvent, type ScoredEventInput, type ScoringContext } from '../discovery/scorer'; + +function makeEvent(overrides: Partial = {}): ScoredEventInput { + return { + title: 'Jazz Night', + category: 'music', + lat: 47.997, + lon: 7.842, + startAt: new Date(Date.now() + 3 * 86400000), // 3 days from now + ...overrides, + }; +} + +function makeContext(overrides: Partial = {}): ScoringContext { + return { + interests: [ + { category: 'music', freetext: null, weight: 1.0 }, + { category: 'tech', freetext: 'Rust', weight: 2.0 }, + ], + regions: [{ lat: 47.997, lon: 7.842, radiusKm: 25 }], + ...overrides, + }; +} + +describe('scoreEvent', () => { + it('returns base score (50) for event with no matches', () => { + const score = scoreEvent( + makeEvent({ category: 'other', title: 'Nothing', lat: null, lon: null }), + makeContext({ interests: [] }) + ); + // Base 50 + time proximity bonus (within 7 days) = ~60 + expect(score).toBeGreaterThanOrEqual(50); + expect(score).toBeLessThanOrEqual(65); + }); + + it('boosts score for category match', () => { + const withMatch = scoreEvent(makeEvent({ category: 'music' }), makeContext()); + const noMatch = scoreEvent(makeEvent({ category: 'other' }), makeContext()); + expect(withMatch).toBeGreaterThan(noMatch); + }); + + it('boosts score for freetext match in title', () => { + // Use no other matching interests to avoid hitting the 100 cap + const ctx = makeContext({ interests: [{ category: 'other', freetext: 'Rust', weight: 1.0 }] }); + const withMatch = scoreEvent( + makeEvent({ title: 'Rust Meetup Freiburg', category: 'sport' }), + ctx + ); + const noMatch = scoreEvent(makeEvent({ title: 'Python Meetup', category: 'sport' }), ctx); + expect(withMatch).toBeGreaterThan(noMatch); + }); + + it('applies interest weight', () => { + const highWeight = scoreEvent( + makeEvent({ title: 'Rust Talk', category: 'tech' }), + makeContext({ interests: [{ category: 'tech', freetext: 'Rust', weight: 3.0 }] }) + ); + const lowWeight = scoreEvent( + makeEvent({ title: 'Rust Talk', category: 'tech' }), + makeContext({ interests: [{ category: 'tech', freetext: 'Rust', weight: 0.5 }] }) + ); + expect(highWeight).toBeGreaterThan(lowWeight); + }); + + it('penalizes distant events', () => { + const near = scoreEvent( + makeEvent({ lat: 47.997, lon: 7.842 }), // same as region center + makeContext() + ); + const far = scoreEvent( + makeEvent({ lat: 48.5, lon: 8.5 }), // ~60km away + makeContext() + ); + expect(near).toBeGreaterThan(far); + }); + + it('boosts events within 7 days more than 14 days', () => { + // Use minimal context to avoid hitting the 100 cap + const ctx = makeContext({ interests: [] }); + const soon = scoreEvent( + makeEvent({ startAt: new Date(Date.now() + 3 * 86400000), category: null }), + ctx + ); + const later = scoreEvent( + makeEvent({ startAt: new Date(Date.now() + 10 * 86400000), category: null }), + ctx + ); + const farOut = scoreEvent( + makeEvent({ startAt: new Date(Date.now() + 30 * 86400000), category: null }), + ctx + ); + expect(soon).toBeGreaterThan(later); + expect(later).toBeGreaterThanOrEqual(farOut); + }); + + it('gives weekend bonus', () => { + // Find the next Saturday + const now = new Date(); + const daysUntilSat = (6 - now.getDay() + 7) % 7 || 7; + const saturday = new Date(now.getTime() + daysUntilSat * 86400000); + const monday = new Date(saturday.getTime() + 2 * 86400000); + + const weekend = scoreEvent(makeEvent({ startAt: saturday }), makeContext()); + const weekday = scoreEvent(makeEvent({ startAt: monday }), makeContext()); + expect(weekend).toBeGreaterThanOrEqual(weekday); + }); + + it('clamps score to 0-100 range', () => { + // Lots of matching interests should not exceed 100 + const manyInterests = Array.from({ length: 10 }, (_, i) => ({ + category: 'music', + freetext: 'jazz', + weight: 3.0, + })); + const score = scoreEvent( + makeEvent({ title: 'jazz night', category: 'music' }), + makeContext({ interests: manyInterests }) + ); + expect(score).toBeLessThanOrEqual(100); + expect(score).toBeGreaterThanOrEqual(0); + }); + + it('handles missing coordinates gracefully', () => { + const score = scoreEvent(makeEvent({ lat: null, lon: null }), makeContext()); + // Should not crash, just skip distance penalty + expect(score).toBeGreaterThan(0); + }); +}); diff --git a/services/mana-events/src/__tests__/website-extractor.test.ts b/services/mana-events/src/__tests__/website-extractor.test.ts new file mode 100644 index 000000000..9312693d7 --- /dev/null +++ b/services/mana-events/src/__tests__/website-extractor.test.ts @@ -0,0 +1,144 @@ +/** + * Website extractor unit tests — tests the JSON parsing and date handling + * without hitting real LLM or mana-research services. + */ + +import { describe, it, expect } from 'bun:test'; +import { parseExtractedEvents } from '../discovery/website-extractor'; + +const SOURCE_URL = 'https://jazzhaus.de/programm'; +const SOURCE_NAME = 'Jazzhaus Freiburg'; + +describe('parseExtractedEvents', () => { + it('parses a well-formed JSON response', () => { + const futureYear = new Date().getFullYear() + 1; + const json = JSON.stringify({ + events: [ + { + title: 'Jazz Night', + date: `${futureYear}-05-15`, + time: '20:00', + location: 'Jazzhaus Freiburg', + category: 'music', + priceInfo: '15 EUR', + }, + { + title: 'Rock Festival', + date: `${futureYear}-06-20`, + location: 'Stadtpark', + category: 'music', + }, + ], + }); + + const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(2); + expect(events[0].title).toBe('Jazz Night'); + expect(events[0].location).toBe('Jazzhaus Freiburg'); + expect(events[0].category).toBe('music'); + expect(events[0].priceInfo).toBe('15 EUR'); + expect(events[0].allDay).toBe(false); // has time + expect(events[1].title).toBe('Rock Festival'); + expect(events[1].allDay).toBe(true); // no time + }); + + it('handles markdown-fenced JSON', () => { + const futureYear = new Date().getFullYear() + 1; + const json = `\`\`\`json +{ + "events": [ + {"title": "Test", "date": "${futureYear}-03-01", "time": "19:00"} + ] +} +\`\`\``; + + const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(1); + expect(events[0].title).toBe('Test'); + }); + + it('parses German date format (DD.MM.YYYY)', () => { + const futureYear = new Date().getFullYear() + 1; + const json = JSON.stringify({ + events: [{ title: 'Fest', date: `15.06.${futureYear}`, time: '18:00' }], + }); + + const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(1); + expect(events[0].startAt.getFullYear()).toBe(futureYear); + expect(events[0].startAt.getMonth()).toBe(5); // June = 5 + }); + + it('filters out past events', () => { + const json = JSON.stringify({ + events: [ + { title: 'Past Event', date: '2020-01-01', time: '19:00' }, + { title: 'Future Event', date: '2030-01-01', time: '19:00' }, + ], + }); + + const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(1); + expect(events[0].title).toBe('Future Event'); + }); + + it('skips events without title or date', () => { + const json = JSON.stringify({ + events: [ + { title: 'Valid', date: '2030-01-01' }, + { title: '', date: '2030-01-02' }, + { title: 'No Date' }, + { date: '2030-01-03' }, + ], + }); + + const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(1); + expect(events[0].title).toBe('Valid'); + }); + + it('truncates long titles and descriptions', () => { + const json = JSON.stringify({ + events: [ + { + title: 'A'.repeat(300), + date: '2030-01-01', + description: 'B'.repeat(3000), + }, + ], + }); + + const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME); + expect(events[0].title.length).toBe(200); + expect(events[0].description!.length).toBe(2000); + }); + + it('handles empty/invalid JSON gracefully', () => { + expect(parseExtractedEvents('', SOURCE_URL, SOURCE_NAME)).toHaveLength(0); + expect(parseExtractedEvents('not json', SOURCE_URL, SOURCE_NAME)).toHaveLength(0); + expect(parseExtractedEvents('{}', SOURCE_URL, SOURCE_NAME)).toHaveLength(0); + expect(parseExtractedEvents('{"events": "not array"}', SOURCE_URL, SOURCE_NAME)).toHaveLength( + 0 + ); + }); + + it('handles endDate and endTime', () => { + const futureYear = new Date().getFullYear() + 1; + const json = JSON.stringify({ + events: [ + { + title: 'Festival', + date: `${futureYear}-07-01`, + time: '10:00', + endDate: `${futureYear}-07-03`, + endTime: '23:00', + }, + ], + }); + + const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME); + expect(events).toHaveLength(1); + expect(events[0].endAt).not.toBeNull(); + expect(events[0].endAt!.getDate()).toBe(3); + }); +}); diff --git a/services/mana-events/src/app.ts b/services/mana-events/src/app.ts index 87a7d47fc..571311433 100644 --- a/services/mana-events/src/app.ts +++ b/services/mana-events/src/app.ts @@ -12,6 +12,8 @@ import { jwtAuth } from './middleware/jwt-auth'; import { healthRoutes } from './routes/health'; import { createEventsRoutes } from './routes/events'; import { createRsvpRoutes } from './routes/rsvp'; +import { createDiscoveryRoutes } from './routes/discovery'; +import { createDiscoveryFeedRoutes } from './routes/discovery-feed'; /** * Build the Hono app. The auth middleware is injected so tests can swap @@ -42,5 +44,10 @@ export function createApp( app.use('/api/v1/events/*', authMiddleware); app.route('/api/v1/events', createEventsRoutes(db)); + // Discovery endpoints (all authenticated) + app.use('/api/v1/discovery/*', authMiddleware); + app.route('/api/v1/discovery', createDiscoveryRoutes(db, config)); + app.route('/api/v1/discovery', createDiscoveryFeedRoutes(db)); + return app; } diff --git a/services/mana-events/src/config.ts b/services/mana-events/src/config.ts index ffafdc6fd..12554bfb3 100644 --- a/services/mana-events/src/config.ts +++ b/services/mana-events/src/config.ts @@ -15,6 +15,9 @@ export interface Config { // Hard cap on total RSVPs per token rsvpMaxPerToken: number; }; + // Phase 2: external service URLs for event discovery + manaResearchUrl: string; + manaLlmUrl: string; } export function loadConfig(): Config { @@ -38,5 +41,7 @@ export function loadConfig(): Config { rsvpPerTokenPerHour: parseInt(process.env.RSVP_RATE_LIMIT || '60', 10), rsvpMaxPerToken: parseInt(process.env.RSVP_MAX_PER_TOKEN || '500', 10), }, + manaResearchUrl: process.env.MANA_RESEARCH_URL || 'http://localhost:3068', + manaLlmUrl: process.env.MANA_LLM_URL || 'http://localhost:3025', }; } diff --git a/services/mana-events/src/db/schema/discovery.ts b/services/mana-events/src/db/schema/discovery.ts new file mode 100644 index 000000000..4d6d85154 --- /dev/null +++ b/services/mana-events/src/db/schema/discovery.ts @@ -0,0 +1,144 @@ +/** + * Event Discovery schema — regions, interests, sources, discovered events, + * and user actions (save/dismiss). + * + * All tables live in the `event_discovery` Postgres schema inside `mana_platform`. + * Data is server-authoritative (not local-first) — the client caches results + * but the server owns the crawl loop and deduplication. + */ + +import { + pgSchema, + uuid, + integer, + text, + real, + timestamp, + boolean, + doublePrecision, + uniqueIndex, + index, + jsonb, +} from 'drizzle-orm/pg-core'; + +export const discoverySchema = pgSchema('event_discovery'); + +// ─── Regions ──────────────────────────────────────────────────────── + +/** Geographic areas the user wants to discover events in. */ +export const discoveryRegions = discoverySchema.table( + 'discovery_regions', + { + id: uuid('id').defaultRandom().primaryKey(), + userId: text('user_id').notNull(), + label: text('label').notNull(), + lat: doublePrecision('lat').notNull(), + lon: doublePrecision('lon').notNull(), + radiusKm: integer('radius_km').default(25).notNull(), + isActive: boolean('is_active').default(true).notNull(), + createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(), + }, + (t) => ({ + userIdx: index('discovery_regions_user_idx').on(t.userId), + }) +); + +// ─── Interests ────────────────────────────────────────────────────── + +/** User interests for relevance scoring. */ +export const discoveryInterests = discoverySchema.table( + 'discovery_interests', + { + id: uuid('id').defaultRandom().primaryKey(), + userId: text('user_id').notNull(), + category: text('category').notNull(), + freetext: text('freetext'), + weight: real('weight').default(1.0).notNull(), + createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(), + }, + (t) => ({ + userIdx: index('discovery_interests_user_idx').on(t.userId), + }) +); + +// ─── Sources ──────────────────────────────────────────────────────── + +/** Event sources that are periodically crawled (iCal feeds, websites, …). */ +export const discoverySources = discoverySchema.table( + 'discovery_sources', + { + id: uuid('id').defaultRandom().primaryKey(), + userId: text('user_id').notNull(), + type: text('type').notNull(), // 'ical' | 'website' | 'api' | 'search_query' + url: text('url'), + name: text('name').notNull(), + regionId: uuid('region_id').references(() => discoveryRegions.id, { onDelete: 'cascade' }), + crawlIntervalHours: integer('crawl_interval_hours').default(24).notNull(), + lastCrawledAt: timestamp('last_crawled_at', { withTimezone: true }), + lastSuccessAt: timestamp('last_success_at', { withTimezone: true }), + errorCount: integer('error_count').default(0).notNull(), + lastError: text('last_error'), + isActive: boolean('is_active').default(true).notNull(), + createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(), + updatedAt: timestamp('updated_at', { withTimezone: true }).defaultNow().notNull(), + }, + (t) => ({ + userActiveIdx: index('discovery_sources_user_active_idx').on(t.userId, t.isActive), + }) +); + +// ─── Discovered Events ────────────────────────────────────────────── + +/** Normalized events found by crawling sources. Deduplicated by hash. */ +export const discoveredEvents = discoverySchema.table( + 'discovered_events', + { + id: uuid('id').defaultRandom().primaryKey(), + sourceId: uuid('source_id') + .notNull() + .references(() => discoverySources.id, { onDelete: 'cascade' }), + externalId: text('external_id'), + dedupeHash: text('dedupe_hash').notNull(), + title: text('title').notNull(), + description: text('description'), + location: text('location'), + lat: doublePrecision('lat'), + lon: doublePrecision('lon'), + startAt: timestamp('start_at', { withTimezone: true }).notNull(), + endAt: timestamp('end_at', { withTimezone: true }), + allDay: boolean('all_day').default(false).notNull(), + imageUrl: text('image_url'), + sourceUrl: text('source_url').notNull(), + sourceName: text('source_name'), + category: text('category'), + priceInfo: text('price_info'), + rawExtracted: jsonb('raw_extracted'), + crawledAt: timestamp('crawled_at', { withTimezone: true }).defaultNow().notNull(), + expiresAt: timestamp('expires_at', { withTimezone: true }), + }, + (t) => ({ + dedupeIdx: uniqueIndex('discovered_events_dedupe_idx').on(t.dedupeHash), + startIdx: index('discovered_events_start_idx').on(t.startAt), + sourceIdx: index('discovered_events_source_idx').on(t.sourceId), + }) +); + +// ─── User Actions ─────────────────────────────────────────────────── + +/** Tracks user interaction with discovered events (save, dismiss). */ +export const discoveryUserActions = discoverySchema.table( + 'discovery_user_actions', + { + id: uuid('id').defaultRandom().primaryKey(), + userId: text('user_id').notNull(), + eventId: uuid('event_id') + .notNull() + .references(() => discoveredEvents.id, { onDelete: 'cascade' }), + action: text('action').notNull(), // 'save' | 'dismiss' + actedAt: timestamp('acted_at', { withTimezone: true }).defaultNow().notNull(), + }, + (t) => ({ + userEventUnique: uniqueIndex('discovery_user_actions_user_event_idx').on(t.userId, t.eventId), + userIdx: index('discovery_user_actions_user_idx').on(t.userId), + }) +); diff --git a/services/mana-events/src/db/schema/index.ts b/services/mana-events/src/db/schema/index.ts index 7981d6b64..fe83445cf 100644 --- a/services/mana-events/src/db/schema/index.ts +++ b/services/mana-events/src/db/schema/index.ts @@ -1 +1,2 @@ export * from './events'; +export * from './discovery'; diff --git a/services/mana-events/src/discovery/crawl-scheduler.ts b/services/mana-events/src/discovery/crawl-scheduler.ts new file mode 100644 index 000000000..7c751c7aa --- /dev/null +++ b/services/mana-events/src/discovery/crawl-scheduler.ts @@ -0,0 +1,279 @@ +/** + * Crawl scheduler — periodically processes due discovery sources. + * + * Runs on a configurable interval (default 15 min). For each source + * whose crawl interval has elapsed: + * 1. Fetch + parse (iCal for now, website extraction in Phase 2) + * 2. Deduplicate via hash + * 3. Upsert into discovered_events + * 4. Update source status (last_crawled_at, error_count) + * + * Also cleans up expired events (past events older than 1 day). + */ + +import { and, eq, lt, or, isNull, sql } from 'drizzle-orm'; +import type { Database } from '../db/connection'; +import { discoverySources, discoveredEvents } from '../db/schema/discovery'; +import { parseIcalFeed } from './ical-parser'; +import { extractEventsFromWebsite } from './website-extractor'; +import { computeDedupeHash } from './deduplicator'; +import type { NormalizedEvent } from './types'; + +const MAX_ERROR_COUNT = 5; + +/** Find all sources due for a crawl. */ +async function getDueSources(db: Database) { + return db + .select() + .from(discoverySources) + .where( + and( + eq(discoverySources.isActive, true), + or( + isNull(discoverySources.lastCrawledAt), + sql`${discoverySources.lastCrawledAt} < now() - (${discoverySources.crawlIntervalHours} || ' hours')::interval` + ) + ) + ); +} + +/** External service URLs for Phase 2 website extraction. */ +interface CrawlConfig { + manaResearchUrl: string; + manaLlmUrl: string; +} + +/** Crawl a single source and return normalized events. */ +async function crawlSource( + source: typeof discoverySources.$inferSelect, + config?: CrawlConfig +): Promise<{ events: NormalizedEvent[]; error?: string }> { + try { + switch (source.type) { + case 'ical': { + if (!source.url) return { events: [], error: 'No URL configured' }; + const events = await parseIcalFeed(source.url, source.name); + return { events }; + } + case 'website': { + if (!source.url) return { events: [], error: 'No URL configured' }; + if (!config) + return { events: [], error: 'Missing research/LLM config for website extraction' }; + const events = await extractEventsFromWebsite( + source.url, + source.name, + config.manaResearchUrl, + config.manaLlmUrl + ); + return { events }; + } + default: + return { events: [], error: `Unsupported source type: ${source.type}` }; + } + } catch (err) { + const message = err instanceof Error ? err.message : 'Unknown error'; + return { events: [], error: message }; + } +} + +/** Upsert normalized events into discovered_events. */ +async function upsertEvents( + db: Database, + sourceId: string, + sourceName: string, + events: NormalizedEvent[] +): Promise { + let upserted = 0; + for (const event of events) { + const dedupeHash = await computeDedupeHash(event); + const expiresAt = new Date(event.startAt.getTime() + 24 * 60 * 60 * 1000); + + try { + await db + .insert(discoveredEvents) + .values({ + sourceId, + externalId: event.externalId ?? null, + dedupeHash, + title: event.title, + description: event.description ?? null, + location: event.location ?? null, + lat: event.lat ?? null, + lon: event.lon ?? null, + startAt: event.startAt, + endAt: event.endAt ?? null, + allDay: event.allDay ?? false, + imageUrl: event.imageUrl ?? null, + sourceUrl: event.sourceUrl, + sourceName, + category: event.category ?? null, + priceInfo: event.priceInfo ?? null, + expiresAt, + }) + .onConflictDoUpdate({ + target: discoveredEvents.dedupeHash, + set: { + title: event.title, + description: event.description ?? null, + location: event.location ?? null, + startAt: event.startAt, + endAt: event.endAt ?? null, + sourceUrl: event.sourceUrl, + category: event.category ?? null, + priceInfo: event.priceInfo ?? null, + crawledAt: new Date(), + }, + }); + upserted++; + } catch (err) { + // Log but don't fail the whole batch for one bad event + console.error(`[discovery] failed to upsert event "${event.title}":`, err); + } + } + return upserted; +} + +/** Process a single source: crawl, dedup, upsert, update status. */ +async function processSource( + db: Database, + source: typeof discoverySources.$inferSelect, + config?: CrawlConfig +): Promise { + const { events, error } = await crawlSource(source, config); + const now = new Date(); + + if (error) { + const newErrorCount = source.errorCount + 1; + await db + .update(discoverySources) + .set({ + lastCrawledAt: now, + errorCount: newErrorCount, + lastError: error, + isActive: newErrorCount < MAX_ERROR_COUNT, + updatedAt: now, + }) + .where(eq(discoverySources.id, source.id)); + + if (newErrorCount >= MAX_ERROR_COUNT) { + console.warn( + `[discovery] source "${source.name}" (${source.id}) deactivated after ${MAX_ERROR_COUNT} errors` + ); + } + return; + } + + const upserted = await upsertEvents(db, source.id, source.name, events); + + await db + .update(discoverySources) + .set({ + lastCrawledAt: now, + lastSuccessAt: now, + errorCount: 0, + lastError: null, + updatedAt: now, + }) + .where(eq(discoverySources.id, source.id)); + + if (upserted > 0) { + console.log(`[discovery] crawled "${source.name}" — ${upserted} events upserted`); + } +} + +/** Delete discovered events whose expiry has passed (past events). */ +async function cleanupExpiredEvents(db: Database): Promise { + const result = await db + .delete(discoveredEvents) + .where(lt(discoveredEvents.expiresAt, new Date())) + .returning({ id: discoveredEvents.id }); + return result.length; +} + +/** Run one tick of the crawl scheduler. */ +export async function runCrawlTick(db: Database, config?: CrawlConfig): Promise { + try { + const due = await getDueSources(db); + for (const source of due) { + await processSource(db, source, config); + } + + const expired = await cleanupExpiredEvents(db); + if (expired > 0) { + console.log(`[discovery] cleaned up ${expired} expired events`); + } + } catch (err) { + console.error('[discovery] crawl tick failed:', err); + } +} + +/** + * Start the periodic crawl scheduler. Returns a stop function. + * Default interval: 15 minutes. + */ +export function startCrawlScheduler( + db: Database, + config?: CrawlConfig, + intervalMs = 15 * 60 * 1000 +): () => void { + const tick = () => runCrawlTick(db, config); + + // First run shortly after boot + const bootTimer = setTimeout(tick, 10_000); + const intervalTimer = setInterval(tick, intervalMs); + + return () => { + clearTimeout(bootTimer); + clearInterval(intervalTimer); + }; +} + +/** + * Crawl a single source immediately (triggered by user action). + * Returns the number of events upserted. + */ +export async function crawlSourceNow( + db: Database, + sourceId: string, + config?: CrawlConfig +): Promise<{ upserted: number; error?: string }> { + const sources = await db + .select() + .from(discoverySources) + .where(eq(discoverySources.id, sourceId)) + .limit(1); + + if (!sources[0]) return { upserted: 0, error: 'Source not found' }; + + const source = sources[0]; + const { events, error } = await crawlSource(source, config); + const now = new Date(); + + if (error) { + await db + .update(discoverySources) + .set({ + lastCrawledAt: now, + errorCount: source.errorCount + 1, + lastError: error, + updatedAt: now, + }) + .where(eq(discoverySources.id, sourceId)); + return { upserted: 0, error }; + } + + const upserted = await upsertEvents(db, sourceId, source.name, events); + + await db + .update(discoverySources) + .set({ + lastCrawledAt: now, + lastSuccessAt: now, + errorCount: 0, + lastError: null, + updatedAt: now, + }) + .where(eq(discoverySources.id, sourceId)); + + return { upserted }; +} diff --git a/services/mana-events/src/discovery/deduplicator.ts b/services/mana-events/src/discovery/deduplicator.ts new file mode 100644 index 000000000..1ca96d081 --- /dev/null +++ b/services/mana-events/src/discovery/deduplicator.ts @@ -0,0 +1,27 @@ +/** + * Deduplication for discovered events. + * + * Hash is based on normalized title + date + location so the same event + * from different sources (or re-crawls of the same source) collapses + * into a single row. + */ + +import type { NormalizedEvent } from './types'; + +/** + * Compute a SHA-256 hex hash for deduplication. + * Key components: lowercased title + ISO date (no time) + lowercased location. + */ +export async function computeDedupeHash(event: NormalizedEvent): Promise { + const title = event.title.toLowerCase().trim(); + const date = event.startAt.toISOString().slice(0, 10); // YYYY-MM-DD + const location = (event.location ?? '').toLowerCase().trim(); + + const input = `${title}|${date}|${location}`; + const encoded = new TextEncoder().encode(input); + const hashBuffer = await crypto.subtle.digest('SHA-256', encoded); + const hashArray = new Uint8Array(hashBuffer); + return Array.from(hashArray) + .map((b) => b.toString(16).padStart(2, '0')) + .join(''); +} diff --git a/services/mana-events/src/discovery/ical-parser.ts b/services/mana-events/src/discovery/ical-parser.ts new file mode 100644 index 000000000..b2be8f825 --- /dev/null +++ b/services/mana-events/src/discovery/ical-parser.ts @@ -0,0 +1,96 @@ +/** + * iCal (.ics) feed parser — fetches a remote iCal URL and extracts + * VEVENT entries as NormalizedEvents. + * + * Uses node-ical for robust parsing of the many iCal quirks in the wild + * (timezone aliases, RRULE expansion, non-standard properties). + */ + +import ical, { type VEvent } from 'node-ical'; +import type { NormalizedEvent } from './types'; + +const FETCH_TIMEOUT_MS = 15_000; + +/** + * Fetch and parse an iCal feed URL. Returns future events only. + * Handles VTIMEZONE, DTSTART/DTEND with and without timezone, RRULE. + */ +export async function parseIcalFeed(url: string, sourceName: string): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); + + try { + const res = await fetch(url, { + signal: controller.signal, + headers: { 'User-Agent': 'Mana-Events-Discovery/1.0' }, + }); + if (!res.ok) { + throw new Error(`HTTP ${res.status} fetching ${url}`); + } + const text = await res.text(); + return parseIcalText(text, url, sourceName); + } finally { + clearTimeout(timeout); + } +} + +/** + * Parse raw iCal text into NormalizedEvents. + * Exported for testing without network. + */ +export function parseIcalText( + icalText: string, + sourceUrl: string, + sourceName: string +): NormalizedEvent[] { + const parsed = ical.sync.parseICS(icalText); + const now = new Date(); + const events: NormalizedEvent[] = []; + + for (const component of Object.values(parsed)) { + if (!component || component.type !== 'VEVENT') continue; + const vevent = component as VEvent; + + const startDate = vevent.start ? new Date(vevent.start as unknown as string) : null; + if (!startDate || isNaN(startDate.getTime())) continue; + + // Skip past events (allow 1 day grace for ongoing events) + const endDate = vevent.end ? new Date(vevent.end as unknown as string) : null; + const cutoff = endDate ?? startDate; + if (cutoff.getTime() < now.getTime() - 24 * 60 * 60 * 1000) continue; + + const summary = typeof vevent.summary === 'string' ? vevent.summary.trim() : ''; + if (!summary) continue; + + const description = typeof vevent.description === 'string' ? vevent.description.trim() : null; + const location = typeof vevent.location === 'string' ? vevent.location.trim() : null; + + // Detect all-day: either DTSTART is DATE (no time), or duration spans full days + const allDay = + vevent.datetype === 'date' || + (vevent.start && + typeof vevent.start === 'object' && + 'dateOnly' in vevent.start && + (vevent.start as { dateOnly?: boolean }).dateOnly === true); + + // Use the VEVENT UID as external ID for dedup stability across re-crawls + const uid = typeof vevent.uid === 'string' ? vevent.uid : null; + + // Extract URL if present + const eventUrl = typeof vevent.url === 'string' ? vevent.url : sourceUrl; + + events.push({ + title: summary, + description: description ? description.slice(0, 2000) : null, + location, + startAt: startDate, + endAt: endDate, + allDay: !!allDay, + sourceUrl: eventUrl, + externalId: uid, + category: null, // iCal doesn't have a standard category we can rely on + }); + } + + return events; +} diff --git a/services/mana-events/src/discovery/scorer.ts b/services/mana-events/src/discovery/scorer.ts new file mode 100644 index 000000000..718cee99c --- /dev/null +++ b/services/mana-events/src/discovery/scorer.ts @@ -0,0 +1,80 @@ +/** + * Relevance Scorer — ranks discovered events for the user's feed. + * + * Scoring dimensions: + * - Category match with user interests (+20 per match, weighted) + * - Freetext match in title (+15 per interest freetext match) + * - Distance to nearest region center (-1 per km beyond 5km) + * - Time proximity (+10 if within 7 days, +5 if within 14 days) + * - Weekend bonus (+5 for Sat/Sun events) + * + * Score range: 0–100, clamped. + */ + +export interface ScoredEventInput { + category: string | null; + title: string; + lat: number | null; + lon: number | null; + startAt: Date; +} + +export interface ScoringContext { + interests: Array<{ category: string; freetext: string | null; weight: number }>; + regions: Array<{ lat: number; lon: number; radiusKm: number }>; +} + +/** + * Compute a relevance score for a discovered event. + * Returns 0–100, higher = more relevant. + */ +export function scoreEvent(event: ScoredEventInput, ctx: ScoringContext): number { + let score = 50; // Base score + + // ── Category match ────────────────────────────────────────── + for (const interest of ctx.interests) { + if (event.category && event.category === interest.category) { + score += 20 * interest.weight; + } + if (interest.freetext && event.title.toLowerCase().includes(interest.freetext.toLowerCase())) { + score += 15 * interest.weight; + } + } + + // ── Distance ──────────────────────────────────────────────── + if (event.lat != null && event.lon != null && ctx.regions.length > 0) { + const nearest = Math.min( + ...ctx.regions.map((r) => haversineKm(event.lat!, event.lon!, r.lat, r.lon)) + ); + // Penalty: -1 per km beyond 5km + score -= Math.max(0, nearest - 5); + } + + // ── Time proximity ────────────────────────────────────────── + const daysUntil = (event.startAt.getTime() - Date.now()) / 86_400_000; + if (daysUntil >= 0 && daysUntil <= 7) score += 10; + else if (daysUntil > 7 && daysUntil <= 14) score += 5; + + // ── Weekend bonus ─────────────────────────────────────────── + const dow = event.startAt.getDay(); + if (dow === 0 || dow === 6) score += 5; + + return Math.max(0, Math.min(100, Math.round(score))); +} + +/** + * Haversine distance in km between two lat/lon points. + */ +function haversineKm(lat1: number, lon1: number, lat2: number, lon2: number): number { + const R = 6371; + const dLat = toRad(lat2 - lat1); + const dLon = toRad(lon2 - lon1); + const a = + Math.sin(dLat / 2) ** 2 + + Math.cos(toRad(lat1)) * Math.cos(toRad(lat2)) * Math.sin(dLon / 2) ** 2; + return R * 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); +} + +function toRad(deg: number): number { + return (deg * Math.PI) / 180; +} diff --git a/services/mana-events/src/discovery/source-discoverer.ts b/services/mana-events/src/discovery/source-discoverer.ts new file mode 100644 index 000000000..8beebc6de --- /dev/null +++ b/services/mana-events/src/discovery/source-discoverer.ts @@ -0,0 +1,187 @@ +/** + * Source Discoverer — automatically finds event sources for a region. + * + * Given a region (e.g. "Freiburg"), searches the web via mana-research + * for iCal feeds and venue websites, then inserts them as suggested + * sources the user can activate or reject. + * + * Pipeline: + * 1. Build search queries from region label + * 2. Search via mana-research POST /api/v1/search + * 3. Classify results: .ics URLs → 'ical', venue/event pages → 'website' + * 4. Insert as discovery_sources with is_active=false (suggested) + */ + +import { eq, and } from 'drizzle-orm'; +import type { Database } from '../db/connection'; +import { discoverySources, discoveryRegions } from '../db/schema/discovery'; + +const SEARCH_TIMEOUT_MS = 15_000; + +/** Patterns that indicate an iCal feed URL. */ +const ICAL_PATTERNS = [/\.ics$/i, /\.ical$/i, /webcal:\/\//i, /format=ical/i, /export.*ical/i]; + +/** Patterns that indicate an event/venue page worth crawling. */ +const EVENT_PAGE_PATTERNS = [ + /veranstaltung/i, + /kalender/i, + /programm/i, + /events?\b/i, + /termine/i, + /konzert/i, + /festival/i, + /theater/i, + /what.?s.?on/i, + /schedule/i, + /agenda/i, +]; + +/** Search queries to discover event sources for a region. */ +function buildSearchQueries(regionLabel: string): string[] { + return [ + `${regionLabel} Veranstaltungskalender`, + `${regionLabel} Events Termine`, + `${regionLabel} Kulturzentrum Programm`, + `${regionLabel} Konzerte Theater Termine`, + `${regionLabel} Vereine Veranstaltungen`, + ]; +} + +interface SearchHit { + url: string; + title: string; + snippet?: string; +} + +interface SearchResponse { + success: boolean; + data?: { + results: SearchHit[]; + }; +} + +/** Classify a URL as ical, website, or null (not relevant). */ +function classifyUrl(url: string, title: string, snippet?: string): 'ical' | 'website' | null { + // Check for iCal feed + if (ICAL_PATTERNS.some((p) => p.test(url))) return 'ical'; + + // Check for event/venue page + const text = `${url} ${title} ${snippet ?? ''}`; + if (EVENT_PAGE_PATTERNS.some((p) => p.test(text))) return 'website'; + + return null; +} + +/** Extract a human-readable name from a URL + title. */ +function extractSourceName(url: string, title: string): string { + // Prefer the page title, trimmed to something reasonable + if (title) { + // Strip common suffixes + const cleaned = title + .replace(/\s*[-|–—]\s*(Startseite|Home|Events?|Veranstaltungen|Termine|Programm).*$/i, '') + .trim(); + if (cleaned.length > 3 && cleaned.length < 100) return cleaned; + } + // Fallback: hostname + try { + return new URL(url).hostname.replace(/^www\./, ''); + } catch { + return url.slice(0, 80); + } +} + +export interface DiscoverResult { + suggestedCount: number; + queries: number; + searchResults: number; +} + +/** + * Discover event sources for a region by searching the web. + * + * Inserts found sources with is_active=false so the user can review + * and activate them. Skips URLs that already exist as sources. + */ +export async function discoverSourcesForRegion( + db: Database, + regionId: string, + userId: string, + manaResearchUrl: string +): Promise { + // Get the region + const [region] = await db + .select() + .from(discoveryRegions) + .where(and(eq(discoveryRegions.id, regionId), eq(discoveryRegions.userId, userId))) + .limit(1); + + if (!region) throw new Error('Region not found'); + + // Get existing source URLs to avoid duplicates + const existingSources = await db + .select({ url: discoverySources.url }) + .from(discoverySources) + .where(eq(discoverySources.userId, userId)); + const existingUrls = new Set(existingSources.map((s) => s.url).filter(Boolean)); + + const queries = buildSearchQueries(region.label); + let totalResults = 0; + let suggestedCount = 0; + + // Run searches in parallel (but limit to avoid hammering the service) + const searchResults = await Promise.all( + queries.map((query) => searchWeb(manaResearchUrl, query)) + ); + + for (const result of searchResults) { + if (!result?.data?.results) continue; + + for (const hit of result.data.results) { + totalResults++; + if (existingUrls.has(hit.url)) continue; + + const type = classifyUrl(hit.url, hit.title, hit.snippet); + if (!type) continue; + + const name = extractSourceName(hit.url, hit.title); + + try { + await db.insert(discoverySources).values({ + userId, + type, + url: hit.url, + name, + regionId, + isActive: false, // suggested — user must activate + crawlIntervalHours: type === 'ical' ? 24 : 48, + }); + existingUrls.add(hit.url); + suggestedCount++; + } catch { + // Ignore dupes from parallel queries + } + } + } + + return { suggestedCount, queries: queries.length, searchResults: totalResults }; +} + +/** Search the web via mana-research. Gracefully returns null on failure. */ +async function searchWeb(manaResearchUrl: string, query: string): Promise { + try { + const res = await fetch(`${manaResearchUrl}/api/v1/search`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ query }), + signal: AbortSignal.timeout(SEARCH_TIMEOUT_MS), + }); + if (!res.ok) { + console.warn(`[source-discoverer] search failed ${res.status}: ${query}`); + return null; + } + return (await res.json()) as SearchResponse; + } catch (err) { + console.warn(`[source-discoverer] search error for "${query}":`, err); + return null; + } +} diff --git a/services/mana-events/src/discovery/types.ts b/services/mana-events/src/discovery/types.ts new file mode 100644 index 000000000..78592cc10 --- /dev/null +++ b/services/mana-events/src/discovery/types.ts @@ -0,0 +1,50 @@ +/** + * Discovery domain types — shared across parser, scheduler, and routes. + */ + +/** A normalized event extracted from any source type. */ +export interface NormalizedEvent { + title: string; + description?: string | null; + location?: string | null; + lat?: number | null; + lon?: number | null; + startAt: Date; + endAt?: Date | null; + allDay?: boolean; + imageUrl?: string | null; + sourceUrl: string; + category?: string | null; + priceInfo?: string | null; + externalId?: string | null; +} + +/** Result of crawling a single source. */ +export interface CrawlResult { + sourceId: string; + sourceName: string; + events: NormalizedEvent[]; + error?: string; +} + +/** Source types supported by the crawler. */ +export type SourceType = 'ical' | 'website' | 'api' | 'search_query'; + +/** Event categories for filtering and scoring. */ +export const EVENT_CATEGORIES = [ + 'music', + 'theater', + 'art', + 'tech', + 'sport', + 'food', + 'family', + 'nature', + 'education', + 'community', + 'nightlife', + 'market', + 'other', +] as const; + +export type EventCategory = (typeof EVENT_CATEGORIES)[number]; diff --git a/services/mana-events/src/discovery/website-extractor.ts b/services/mana-events/src/discovery/website-extractor.ts new file mode 100644 index 000000000..f7101a80e --- /dev/null +++ b/services/mana-events/src/discovery/website-extractor.ts @@ -0,0 +1,236 @@ +/** + * Website Extractor — LLM-based event extraction from unstructured web pages. + * + * Pipeline: + * 1. Crawl the page via mana-research POST /api/v1/extract + * 2. Feed the extracted text to mana-llm with a structured output prompt + * 3. Parse the LLM response as NormalizedEvent[] + * + * Uses cheap/fast models (gemma3:4b or haiku) to keep costs low. + * Falls back gracefully on any failure — one bad page doesn't crash the batch. + */ + +import type { NormalizedEvent } from './types'; + +const EXTRACT_TIMEOUT_MS = 20_000; +const LLM_TIMEOUT_MS = 30_000; +const MAX_CONTENT_CHARS = 15_000; // Trim long pages to stay within context window + +interface ExtractResponse { + success: boolean; + data?: { + content: { + title?: string; + text?: string; + markdown?: string; + html?: string; + }; + }; +} + +interface ChatCompletionResponse { + choices: Array<{ + message: { + content: string; + }; + }>; +} + +/** + * Extract events from a website URL. + * + * 1. Fetches + renders the page via mana-research (Firecrawl/Jina/Readability) + * 2. Sends the text to mana-llm with a structured extraction prompt + * 3. Parses JSON output into NormalizedEvent[] + */ +export async function extractEventsFromWebsite( + url: string, + sourceName: string, + manaResearchUrl: string, + manaLlmUrl: string +): Promise { + // Step 1: Extract page content + const content = await fetchPageContent(url, manaResearchUrl); + if (!content) return []; + + // Step 2: LLM extraction + const events = await llmExtractEvents(content, url, sourceName, manaLlmUrl); + return events; +} + +/** Fetch and extract text content from a URL via mana-research. */ +async function fetchPageContent(url: string, manaResearchUrl: string): Promise { + try { + const res = await fetch(`${manaResearchUrl}/api/v1/extract`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ url }), + signal: AbortSignal.timeout(EXTRACT_TIMEOUT_MS), + }); + + if (!res.ok) { + console.warn(`[website-extractor] extract failed ${res.status}: ${url}`); + return null; + } + + const data = (await res.json()) as ExtractResponse; + if (!data.success || !data.data?.content) return null; + + // Prefer markdown > text > html + const text = data.data.content.markdown || data.data.content.text || ''; + if (text.length < 50) return null; // Too short to contain events + + // Trim to stay within LLM context window + return text.slice(0, MAX_CONTENT_CHARS); + } catch (err) { + console.warn(`[website-extractor] fetch error for ${url}:`, err); + return null; + } +} + +/** Build the LLM system prompt for event extraction. */ +function buildExtractionPrompt(): string { + const today = new Date().toISOString().slice(0, 10); + return `Du bist ein Event-Extractor. Extrahiere ALLE kommenden Veranstaltungen von der gegebenen Webseite. + +Pro Event liefere: +- title (string, Pflicht) — Name der Veranstaltung +- date (string, Pflicht) — Startdatum im Format YYYY-MM-DD +- time (string, optional) — Startzeit im Format HH:MM +- endDate (string, optional) — Enddatum falls mehrtägig +- endTime (string, optional) — Endzeit +- location (string, optional) — Veranstaltungsort / Adresse +- description (string, optional) — Kurzbeschreibung, max 300 Zeichen +- category (string, optional) — Eine von: music, theater, art, tech, sport, food, family, nature, education, community, nightlife, market, other +- priceInfo (string, optional) — Preis, z.B. "Eintritt frei", "15 EUR", "VVK 12 / AK 15" + +Heutiges Datum: ${today} +Ignoriere vergangene Events (vor ${today}). +Antwort als JSON-Objekt mit einem "events"-Array. Kein Markdown, nur JSON.`; +} + +/** Send page content to mana-llm for structured event extraction. */ +async function llmExtractEvents( + pageContent: string, + sourceUrl: string, + sourceName: string, + manaLlmUrl: string +): Promise { + try { + const res = await fetch(`${manaLlmUrl}/v1/chat/completions`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: 'ollama/gemma3:4b', + messages: [ + { role: 'system', content: buildExtractionPrompt() }, + { role: 'user', content: `Extrahiere Events von dieser Seite:\n\n${pageContent}` }, + ], + max_tokens: 2048, + temperature: 0, + response_format: { type: 'json_object' }, + }), + signal: AbortSignal.timeout(LLM_TIMEOUT_MS), + }); + + if (!res.ok) { + console.warn(`[website-extractor] LLM failed ${res.status}`); + return []; + } + + const completion = (await res.json()) as ChatCompletionResponse; + const rawJson = completion.choices?.[0]?.message?.content ?? ''; + + return parseExtractedEvents(rawJson, sourceUrl, sourceName); + } catch (err) { + console.warn(`[website-extractor] LLM error:`, err); + return []; + } +} + +/** Parse and validate LLM JSON output into NormalizedEvents. */ +export function parseExtractedEvents( + rawJson: string, + sourceUrl: string, + sourceName: string +): NormalizedEvent[] { + try { + // Strip markdown fences if present + const cleaned = rawJson.replace(/^```(?:json)?\s*\n?/m, '').replace(/\n?```\s*$/m, ''); + const parsed = JSON.parse(cleaned); + const rawEvents = parsed.events ?? parsed; + + if (!Array.isArray(rawEvents)) return []; + + const now = new Date(); + const events: NormalizedEvent[] = []; + + for (const raw of rawEvents) { + if (!raw.title || !raw.date) continue; + + // Parse date — LLMs sometimes return "25. April 2026" instead of ISO + const startAt = parseFlexibleDate(raw.date, raw.time); + if (!startAt || isNaN(startAt.getTime())) continue; + + // Skip past events + if (startAt.getTime() < now.getTime() - 24 * 60 * 60 * 1000) continue; + + const endAt = raw.endDate ? parseFlexibleDate(raw.endDate, raw.endTime) : null; + + events.push({ + title: String(raw.title).trim().slice(0, 200), + description: raw.description ? String(raw.description).trim().slice(0, 2000) : null, + location: raw.location ? String(raw.location).trim() : null, + startAt, + endAt, + allDay: !raw.time, + sourceUrl, + category: raw.category ?? null, + priceInfo: raw.priceInfo ? String(raw.priceInfo).trim() : null, + }); + } + + return events; + } catch (err) { + console.warn(`[website-extractor] JSON parse error:`, err); + return []; + } +} + +/** Parse dates flexibly — handles ISO, German formats, and partial dates. */ +function parseFlexibleDate(dateStr: string, timeStr?: string): Date | null { + if (!dateStr) return null; + + // Try ISO format first (YYYY-MM-DD) + const isoMatch = dateStr.match(/^(\d{4})-(\d{2})-(\d{2})/); + if (isoMatch) { + const [, y, m, d] = isoMatch; + const time = parseTime(timeStr); + return new Date(`${y}-${m}-${d}T${time}:00`); + } + + // Try German format (DD.MM.YYYY) + const deMatch = dateStr.match(/(\d{1,2})\.(\d{1,2})\.(\d{4})/); + if (deMatch) { + const [, d, m, y] = deMatch; + const time = parseTime(timeStr); + return new Date(`${y}-${m!.padStart(2, '0')}-${d!.padStart(2, '0')}T${time}:00`); + } + + // Fallback: let Date parse it + try { + const d = new Date(dateStr); + if (!isNaN(d.getTime())) return d; + } catch { + // ignore + } + + return null; +} + +function parseTime(timeStr?: string): string { + if (!timeStr) return '00:00'; + const match = timeStr.match(/(\d{1,2}):(\d{2})/); + if (match) return `${match[1]!.padStart(2, '0')}:${match[2]}`; + return '00:00'; +} diff --git a/services/mana-events/src/index.ts b/services/mana-events/src/index.ts index 4e22a04cb..bd3c7cbfb 100644 --- a/services/mana-events/src/index.ts +++ b/services/mana-events/src/index.ts @@ -14,6 +14,7 @@ import { createApp } from './app'; import { loadConfig } from './config'; import { getDb } from './db/connection'; import { startRateBucketSweeper } from './lib/cleanup'; +import { startCrawlScheduler } from './discovery/crawl-scheduler'; const config = loadConfig(); const db = getDb(config.databaseUrl); @@ -22,6 +23,12 @@ const db = getDb(config.databaseUrl); // accumulate for the lifetime of long-published events. startRateBucketSweeper(db); +// Event discovery — crawl sources (iCal feeds, websites) every 15 minutes. +startCrawlScheduler(db, { + manaResearchUrl: config.manaResearchUrl, + manaLlmUrl: config.manaLlmUrl, +}); + console.log(`mana-events starting on port ${config.port}...`); export default { diff --git a/services/mana-events/src/routes/discovery-feed.ts b/services/mana-events/src/routes/discovery-feed.ts new file mode 100644 index 000000000..609e5e82b --- /dev/null +++ b/services/mana-events/src/routes/discovery-feed.ts @@ -0,0 +1,136 @@ +/** + * Discovery feed routes — paginated event feed + user actions. + * + * The feed endpoint returns discovered events sorted by start date, + * filtered by date range and optional category, enriched with the + * user's action status (saved/dismissed/null). + */ + +import { Hono } from 'hono'; +import { z } from 'zod'; +import { and, eq, gte, lte, sql, isNull, desc, asc } from 'drizzle-orm'; +import type { Database } from '../db/connection'; +import { discoveredEvents, discoveryUserActions, discoverySources } from '../db/schema/discovery'; +import { BadRequestError } from '../lib/errors'; +import type { AuthUser } from '../middleware/jwt-auth'; + +const feedQuerySchema = z.object({ + from: z.string().datetime().optional(), + to: z.string().datetime().optional(), + category: z.string().max(50).optional(), + limit: z.coerce.number().int().min(1).max(100).optional(), + offset: z.coerce.number().int().min(0).optional(), + hideDismissed: z.coerce.boolean().optional(), +}); + +const actionSchema = z.object({ + action: z.enum(['save', 'dismiss']), +}); + +export function createDiscoveryFeedRoutes(db: Database) { + const app = new Hono<{ Variables: { user: AuthUser } }>(); + + /** + * GET /feed — paginated discovered events. + * + * Joins with user actions to include save/dismiss status. + * Only shows events from the current user's sources. + */ + app.get('/feed', async (c) => { + const user = c.get('user'); + const query = feedQuerySchema.safeParse(c.req.query()); + if (!query.success) throw new BadRequestError(query.error.issues[0]?.message ?? 'Invalid'); + + const { from, to, category, hideDismissed } = query.data; + const limit = query.data.limit ?? 20; + const offset = query.data.offset ?? 0; + + // Build WHERE conditions + const conditions = [ + // Only events from this user's sources + eq(discoverySources.userId, user.userId), + eq(discoverySources.isActive, true), + // Only future events (or today) + gte(discoveredEvents.startAt, from ? new Date(from) : new Date()), + ]; + + if (to) { + conditions.push(lte(discoveredEvents.startAt, new Date(to))); + } + if (category) { + conditions.push(eq(discoveredEvents.category, category)); + } + + // Query events with left join on user actions + const rows = await db + .select({ + id: discoveredEvents.id, + title: discoveredEvents.title, + description: discoveredEvents.description, + location: discoveredEvents.location, + lat: discoveredEvents.lat, + lon: discoveredEvents.lon, + startAt: discoveredEvents.startAt, + endAt: discoveredEvents.endAt, + allDay: discoveredEvents.allDay, + imageUrl: discoveredEvents.imageUrl, + sourceUrl: discoveredEvents.sourceUrl, + sourceName: discoveredEvents.sourceName, + category: discoveredEvents.category, + priceInfo: discoveredEvents.priceInfo, + crawledAt: discoveredEvents.crawledAt, + userAction: discoveryUserActions.action, + }) + .from(discoveredEvents) + .innerJoin(discoverySources, eq(discoveredEvents.sourceId, discoverySources.id)) + .leftJoin( + discoveryUserActions, + and( + eq(discoveryUserActions.eventId, discoveredEvents.id), + eq(discoveryUserActions.userId, user.userId) + ) + ) + .where(and(...conditions)) + .orderBy(asc(discoveredEvents.startAt)) + .limit(limit + 1) // fetch one extra to determine hasMore + .offset(offset); + + // Filter dismissed events client-side if requested + const filtered = hideDismissed ? rows.filter((r) => r.userAction !== 'dismiss') : rows; + + const hasMore = filtered.length > limit; + const events = filtered.slice(0, limit); + + return c.json({ events, total: events.length, hasMore }); + }); + + /** + * POST /feed/:eventId/action — save or dismiss a discovered event. + */ + app.post('/feed/:eventId/action', async (c) => { + const user = c.get('user'); + const eventId = c.req.param('eventId'); + const body = await c.req.json().catch(() => null); + const parsed = actionSchema.safeParse(body); + if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid'); + + await db + .insert(discoveryUserActions) + .values({ + userId: user.userId, + eventId, + action: parsed.data.action, + }) + .onConflictDoUpdate({ + target: [discoveryUserActions.userId, discoveryUserActions.eventId], + set: { + action: parsed.data.action, + actedAt: new Date(), + }, + }); + + return c.json({ ok: true }); + }); + + return app; +} diff --git a/services/mana-events/src/routes/discovery.ts b/services/mana-events/src/routes/discovery.ts new file mode 100644 index 000000000..139acc9e5 --- /dev/null +++ b/services/mana-events/src/routes/discovery.ts @@ -0,0 +1,299 @@ +/** + * Discovery CRUD routes — JWT-authenticated. + * + * Manages regions, interests, and sources for a user's event discovery setup. + */ + +import { Hono } from 'hono'; +import { z } from 'zod'; +import { and, eq } from 'drizzle-orm'; +import type { Database } from '../db/connection'; +import { discoveryRegions, discoveryInterests, discoverySources } from '../db/schema/discovery'; +import { EVENT_CATEGORIES } from '../discovery/types'; +import { crawlSourceNow } from '../discovery/crawl-scheduler'; +import { discoverSourcesForRegion } from '../discovery/source-discoverer'; +import { BadRequestError, ForbiddenError, NotFoundError } from '../lib/errors'; +import type { AuthUser } from '../middleware/jwt-auth'; +import type { Config } from '../config'; + +// ─── Validation schemas ───────────────────────────────────────────── + +const regionCreateSchema = z.object({ + label: z.string().min(1).max(200), + lat: z.number().min(-90).max(90), + lon: z.number().min(-180).max(180), + radiusKm: z.number().int().min(1).max(200).optional(), +}); + +const regionUpdateSchema = z.object({ + label: z.string().min(1).max(200).optional(), + radiusKm: z.number().int().min(1).max(200).optional(), + isActive: z.boolean().optional(), +}); + +const interestCreateSchema = z.object({ + category: z.string().min(1).max(50), + freetext: z.string().max(200).nullable().optional(), + weight: z.number().min(0.1).max(5).optional(), +}); + +const sourceCreateSchema = z.object({ + type: z.enum(['ical', 'website']), + url: z.string().url().max(2000), + name: z.string().min(1).max(200), + regionId: z.string().uuid(), + crawlIntervalHours: z.number().int().min(1).max(168).optional(), // max 7 days +}); + +// ─── Routes ───────────────────────────────────────────────────────── + +export function createDiscoveryRoutes(db: Database, config?: Config) { + const app = new Hono<{ Variables: { user: AuthUser } }>(); + + // ── Regions ────────────────────────────────────────────────── + + app.get('/regions', async (c) => { + const user = c.get('user'); + const regions = await db + .select() + .from(discoveryRegions) + .where(eq(discoveryRegions.userId, user.userId)); + return c.json({ regions }); + }); + + app.post('/regions', async (c) => { + const user = c.get('user'); + const body = await c.req.json().catch(() => null); + const parsed = regionCreateSchema.safeParse(body); + if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid'); + + const [region] = await db + .insert(discoveryRegions) + .values({ + userId: user.userId, + label: parsed.data.label, + lat: parsed.data.lat, + lon: parsed.data.lon, + radiusKm: parsed.data.radiusKm ?? 25, + }) + .returning(); + return c.json({ region }, 201); + }); + + app.put('/regions/:id', async (c) => { + const user = c.get('user'); + const id = c.req.param('id'); + const body = await c.req.json().catch(() => null); + const parsed = regionUpdateSchema.safeParse(body); + if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid'); + + const existing = await db + .select() + .from(discoveryRegions) + .where(and(eq(discoveryRegions.id, id), eq(discoveryRegions.userId, user.userId))) + .limit(1); + if (!existing[0]) throw new NotFoundError('Region not found'); + + const updates: Partial = {}; + if (parsed.data.label !== undefined) updates.label = parsed.data.label; + if (parsed.data.radiusKm !== undefined) updates.radiusKm = parsed.data.radiusKm; + if (parsed.data.isActive !== undefined) updates.isActive = parsed.data.isActive; + + const [region] = await db + .update(discoveryRegions) + .set(updates) + .where(eq(discoveryRegions.id, id)) + .returning(); + return c.json({ region }); + }); + + app.delete('/regions/:id', async (c) => { + const user = c.get('user'); + const id = c.req.param('id'); + const existing = await db + .select() + .from(discoveryRegions) + .where(and(eq(discoveryRegions.id, id), eq(discoveryRegions.userId, user.userId))) + .limit(1); + if (!existing[0]) throw new NotFoundError('Region not found'); + + await db.delete(discoveryRegions).where(eq(discoveryRegions.id, id)); + return c.json({ deleted: true }); + }); + + // ── Interests ──────────────────────────────────────────────── + + app.get('/interests', async (c) => { + const user = c.get('user'); + const interests = await db + .select() + .from(discoveryInterests) + .where(eq(discoveryInterests.userId, user.userId)); + return c.json({ interests }); + }); + + app.post('/interests', async (c) => { + const user = c.get('user'); + const body = await c.req.json().catch(() => null); + const parsed = interestCreateSchema.safeParse(body); + if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid'); + + const [interest] = await db + .insert(discoveryInterests) + .values({ + userId: user.userId, + category: parsed.data.category, + freetext: parsed.data.freetext ?? null, + weight: parsed.data.weight ?? 1.0, + }) + .returning(); + return c.json({ interest }, 201); + }); + + app.delete('/interests/:id', async (c) => { + const user = c.get('user'); + const id = c.req.param('id'); + const existing = await db + .select() + .from(discoveryInterests) + .where(and(eq(discoveryInterests.id, id), eq(discoveryInterests.userId, user.userId))) + .limit(1); + if (!existing[0]) throw new NotFoundError('Interest not found'); + + await db.delete(discoveryInterests).where(eq(discoveryInterests.id, id)); + return c.json({ deleted: true }); + }); + + // ── Sources ────────────────────────────────────────────────── + + app.get('/sources', async (c) => { + const user = c.get('user'); + const sources = await db + .select() + .from(discoverySources) + .where(eq(discoverySources.userId, user.userId)); + return c.json({ sources }); + }); + + app.post('/sources', async (c) => { + const user = c.get('user'); + const body = await c.req.json().catch(() => null); + const parsed = sourceCreateSchema.safeParse(body); + if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid'); + + // Verify the region belongs to this user + const region = await db + .select() + .from(discoveryRegions) + .where( + and(eq(discoveryRegions.id, parsed.data.regionId), eq(discoveryRegions.userId, user.userId)) + ) + .limit(1); + if (!region[0]) throw new BadRequestError('Region not found'); + + const [source] = await db + .insert(discoverySources) + .values({ + userId: user.userId, + type: parsed.data.type, + url: parsed.data.url, + name: parsed.data.name, + regionId: parsed.data.regionId, + crawlIntervalHours: parsed.data.crawlIntervalHours ?? 24, + }) + .returning(); + return c.json({ source }, 201); + }); + + app.delete('/sources/:id', async (c) => { + const user = c.get('user'); + const id = c.req.param('id'); + const existing = await db + .select() + .from(discoverySources) + .where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId))) + .limit(1); + if (!existing[0]) throw new NotFoundError('Source not found'); + + await db.delete(discoverySources).where(eq(discoverySources.id, id)); + return c.json({ deleted: true }); + }); + + // Trigger an immediate crawl for a source + app.post('/sources/:id/crawl', async (c) => { + const user = c.get('user'); + const id = c.req.param('id'); + const existing = await db + .select() + .from(discoverySources) + .where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId))) + .limit(1); + if (!existing[0]) throw new NotFoundError('Source not found'); + + const crawlConfig = config + ? { manaResearchUrl: config.manaResearchUrl, manaLlmUrl: config.manaLlmUrl } + : undefined; + const result = await crawlSourceNow(db, id, crawlConfig); + return c.json(result); + }); + + // ── Source Discovery (Phase 2) ─────────────────────────────── + + // Auto-discover event sources for a region via web search + app.post('/regions/:id/discover-sources', async (c) => { + const user = c.get('user'); + const regionId = c.req.param('id'); + if (!config) throw new BadRequestError('Source discovery not configured'); + + const result = await discoverSourcesForRegion( + db, + regionId, + user.userId, + config.manaResearchUrl + ); + return c.json(result); + }); + + // Activate a suggested source + app.put('/sources/:id/activate', async (c) => { + const user = c.get('user'); + const id = c.req.param('id'); + const existing = await db + .select() + .from(discoverySources) + .where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId))) + .limit(1); + if (!existing[0]) throw new NotFoundError('Source not found'); + + const [source] = await db + .update(discoverySources) + .set({ isActive: true, updatedAt: new Date() }) + .where(eq(discoverySources.id, id)) + .returning(); + + // Trigger immediate crawl for the newly activated source + const crawlConfig = config + ? { manaResearchUrl: config.manaResearchUrl, manaLlmUrl: config.manaLlmUrl } + : undefined; + crawlSourceNow(db, id, crawlConfig).catch(() => {}); + + return c.json({ source }); + }); + + // Reject a suggested source + app.delete('/sources/:id/reject', async (c) => { + const user = c.get('user'); + const id = c.req.param('id'); + const existing = await db + .select() + .from(discoverySources) + .where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId))) + .limit(1); + if (!existing[0]) throw new NotFoundError('Source not found'); + + await db.delete(discoverySources).where(eq(discoverySources.id, id)); + return c.json({ deleted: true }); + }); + + return app; +}