feat(events): add Event Discovery — Phase 1 + 2

Phase 1: Manual iCal feeds + Discovery tab
- 5 new DB tables in event_discovery schema (regions, interests,
  sources, discovered_events, user_actions)
- iCal parser (node-ical) with deduplication (SHA-256 hash)
- Crawl scheduler (15-min interval, auto-deactivate after 5 errors)
- CRUD routes for regions, interests, sources + paginated feed endpoint
- Frontend: "Meine Events" / "Entdecken" tab navigation in ListView
- Discovery setup wizard (regions via mana-geocoding + interests)
- DiscoveredEventCard with save/dismiss, SourceManager for iCal feeds
- "Merken" creates a local socialEvent from discovered event

Phase 2: Auto source discovery + LLM extraction + relevance scoring
- Source discoverer: web search via mana-research to auto-find iCal
  feeds and venue websites for a region
- Website extractor: crawl via mana-research /extract, then LLM-based
  event extraction via mana-llm with structured JSON output
- Flexible date parsing (ISO, DD.MM.YYYY), markdown fence stripping
- Relevance scorer: category match, freetext match, haversine distance,
  time proximity, weekend bonus (0-100 clamped)
- Routes: POST regions/:id/discover-sources, PUT/DELETE sources/:id/activate|reject
- Frontend: "Automatisch finden" button, suggested vs active sources UI

107 tests (all passing), no regressions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-18 15:30:46 +02:00
parent 677123091a
commit b5d55fdb21
34 changed files with 5105 additions and 45 deletions

View file

@ -7,5 +7,5 @@ export default defineConfig({
dbCredentials: {
url: process.env.DATABASE_URL || 'postgresql://mana:devpassword@localhost:5432/mana_platform',
},
schemaFilter: ['events'],
schemaFilter: ['events', 'event_discovery'],
});

View file

@ -12,10 +12,11 @@
"db:studio": "drizzle-kit studio"
},
"dependencies": {
"hono": "^4.7.0",
"drizzle-orm": "^0.38.3",
"postgres": "^3.4.5",
"hono": "^4.7.0",
"jose": "^6.1.2",
"node-ical": "^0.26.0",
"postgres": "^3.4.5",
"zod": "^3.24.0"
},
"devDependencies": {

View file

@ -0,0 +1,80 @@
/**
* Deduplicator unit tests no DB required.
*/
import { describe, it, expect } from 'bun:test';
import { computeDedupeHash } from '../discovery/deduplicator';
import type { NormalizedEvent } from '../discovery/types';
function makeEvent(overrides: Partial<NormalizedEvent> = {}): NormalizedEvent {
return {
title: 'Jazz Night',
startAt: new Date('2026-05-01T19:00:00Z'),
sourceUrl: 'https://example.com/event',
location: 'Jazzhaus Freiburg',
...overrides,
};
}
describe('computeDedupeHash', () => {
it('produces a hex string', async () => {
const hash = await computeDedupeHash(makeEvent());
expect(hash).toMatch(/^[0-9a-f]{64}$/);
});
it('is deterministic (same input = same hash)', async () => {
const a = await computeDedupeHash(makeEvent());
const b = await computeDedupeHash(makeEvent());
expect(a).toBe(b);
});
it('differs when title changes', async () => {
const a = await computeDedupeHash(makeEvent({ title: 'Jazz Night' }));
const b = await computeDedupeHash(makeEvent({ title: 'Rock Night' }));
expect(a).not.toBe(b);
});
it('differs when date changes', async () => {
const a = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-01T19:00:00Z') }));
const b = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-02T19:00:00Z') }));
expect(a).not.toBe(b);
});
it('differs when location changes', async () => {
const a = await computeDedupeHash(makeEvent({ location: 'Jazzhaus Freiburg' }));
const b = await computeDedupeHash(makeEvent({ location: 'E-Werk Freiburg' }));
expect(a).not.toBe(b);
});
it('is case-insensitive (title)', async () => {
const a = await computeDedupeHash(makeEvent({ title: 'Jazz Night' }));
const b = await computeDedupeHash(makeEvent({ title: 'jazz night' }));
expect(a).toBe(b);
});
it('is case-insensitive (location)', async () => {
const a = await computeDedupeHash(makeEvent({ location: 'Jazzhaus Freiburg' }));
const b = await computeDedupeHash(makeEvent({ location: 'jazzhaus freiburg' }));
expect(a).toBe(b);
});
it('treats null and empty location the same', async () => {
const a = await computeDedupeHash(makeEvent({ location: null }));
const b = await computeDedupeHash(makeEvent({ location: '' }));
expect(a).toBe(b);
});
it('ignores time-of-day (same calendar date = same hash)', async () => {
const a = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-01T10:00:00Z') }));
const b = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-01T22:00:00Z') }));
expect(a).toBe(b);
});
it('trims whitespace from title and location', async () => {
const a = await computeDedupeHash(
makeEvent({ title: ' Jazz Night ', location: ' Jazzhaus ' })
);
const b = await computeDedupeHash(makeEvent({ title: 'Jazz Night', location: 'Jazzhaus' }));
expect(a).toBe(b);
});
});

View file

@ -0,0 +1,389 @@
/**
* Discovery route integration tests.
*
* Tests CRUD for regions, interests, sources, and the feed endpoint.
* Uses the same mock-auth pattern as existing mana-events tests.
*/
import { describe, it, expect, beforeEach, afterAll } from 'bun:test';
import { buildTestApp, authedRequest, jsonBody, TEST_USER_ID, OTHER_USER_ID } from './helpers';
const app = buildTestApp();
const BASE = 'http://test/api/v1/discovery';
beforeEach(async () => {
await app.wipe();
});
afterAll(async () => {
await app.wipe();
});
// ─── Helper ─────────────────────────────────────────────────────────
async function createRegion(label = 'Freiburg', lat = 47.997, lon = 7.842, user = TEST_USER_ID) {
const res = await app.fetch(
authedRequest(`${BASE}/regions`, {
method: 'POST',
body: jsonBody({ label, lat, lon, radiusKm: 25 }),
user,
})
);
expect(res.status).toBe(201);
const data = await res.json();
return data.region;
}
// ─── Regions ────────────────────────────────────────────────────────
describe('Discovery Regions', () => {
it('creates a region', async () => {
const region = await createRegion();
expect(region.label).toBe('Freiburg');
expect(region.lat).toBe(47.997);
expect(region.lon).toBe(7.842);
expect(region.radiusKm).toBe(25);
expect(region.isActive).toBe(true);
});
it('lists only own regions', async () => {
await createRegion('Freiburg', 47.997, 7.842, TEST_USER_ID);
await createRegion('Basel', 47.559, 7.589, OTHER_USER_ID);
const res = await app.fetch(authedRequest(`${BASE}/regions`));
const { regions } = await res.json();
expect(regions).toHaveLength(1);
expect(regions[0].label).toBe('Freiburg');
});
it('updates a region', async () => {
const region = await createRegion();
const res = await app.fetch(
authedRequest(`${BASE}/regions/${region.id}`, {
method: 'PUT',
body: jsonBody({ radiusKm: 50, label: 'Freiburg im Breisgau' }),
})
);
expect(res.status).toBe(200);
const { region: updated } = await res.json();
expect(updated.radiusKm).toBe(50);
expect(updated.label).toBe('Freiburg im Breisgau');
});
it('rejects updating another user region', async () => {
const region = await createRegion('Basel', 47.559, 7.589, OTHER_USER_ID);
const res = await app.fetch(
authedRequest(`${BASE}/regions/${region.id}`, {
method: 'PUT',
body: jsonBody({ radiusKm: 100 }),
})
);
expect(res.status).toBe(404);
});
it('deletes a region (cascades to sources)', async () => {
const region = await createRegion();
const delRes = await app.fetch(
authedRequest(`${BASE}/regions/${region.id}`, { method: 'DELETE' })
);
expect(delRes.status).toBe(200);
const listRes = await app.fetch(authedRequest(`${BASE}/regions`));
const { regions } = await listRes.json();
expect(regions).toHaveLength(0);
});
it('rejects invalid coordinates', async () => {
const res = await app.fetch(
authedRequest(`${BASE}/regions`, {
method: 'POST',
body: jsonBody({ label: 'Bad', lat: 999, lon: 7.0 }),
})
);
expect(res.status).toBe(400);
});
});
// ─── Interests ──────────────────────────────────────────────────────
describe('Discovery Interests', () => {
it('creates and lists interests', async () => {
const res1 = await app.fetch(
authedRequest(`${BASE}/interests`, {
method: 'POST',
body: jsonBody({ category: 'music' }),
})
);
expect(res1.status).toBe(201);
const res2 = await app.fetch(
authedRequest(`${BASE}/interests`, {
method: 'POST',
body: jsonBody({ category: 'tech', freetext: 'Rust Meetups', weight: 2.0 }),
})
);
expect(res2.status).toBe(201);
const listRes = await app.fetch(authedRequest(`${BASE}/interests`));
const { interests } = await listRes.json();
expect(interests).toHaveLength(2);
expect(interests.find((i: { category: string }) => i.category === 'tech').freetext).toBe(
'Rust Meetups'
);
expect(interests.find((i: { category: string }) => i.category === 'tech').weight).toBe(2.0);
});
it('deletes an interest', async () => {
const createRes = await app.fetch(
authedRequest(`${BASE}/interests`, {
method: 'POST',
body: jsonBody({ category: 'sport' }),
})
);
const { interest } = await createRes.json();
const delRes = await app.fetch(
authedRequest(`${BASE}/interests/${interest.id}`, { method: 'DELETE' })
);
expect(delRes.status).toBe(200);
const listRes = await app.fetch(authedRequest(`${BASE}/interests`));
const { interests } = await listRes.json();
expect(interests).toHaveLength(0);
});
});
// ─── Sources ────────────────────────────────────────────────────────
describe('Discovery Sources', () => {
it('creates a source linked to a region', async () => {
const region = await createRegion();
const res = await app.fetch(
authedRequest(`${BASE}/sources`, {
method: 'POST',
body: jsonBody({
type: 'ical',
url: 'https://example.com/events.ics',
name: 'Test Venue',
regionId: region.id,
}),
})
);
expect(res.status).toBe(201);
const { source } = await res.json();
expect(source.name).toBe('Test Venue');
expect(source.type).toBe('ical');
expect(source.regionId).toBe(region.id);
expect(source.isActive).toBe(true);
expect(source.errorCount).toBe(0);
});
it('rejects source with invalid region', async () => {
const res = await app.fetch(
authedRequest(`${BASE}/sources`, {
method: 'POST',
body: jsonBody({
type: 'ical',
url: 'https://example.com/events.ics',
name: 'Bad',
regionId: '00000000-0000-0000-0000-000000000000',
}),
})
);
expect(res.status).toBe(400);
});
it('rejects source for another user region', async () => {
const region = await createRegion('Basel', 47.559, 7.589, OTHER_USER_ID);
const res = await app.fetch(
authedRequest(`${BASE}/sources`, {
method: 'POST',
body: jsonBody({
type: 'ical',
url: 'https://example.com/events.ics',
name: 'Sneaky',
regionId: region.id,
}),
})
);
expect(res.status).toBe(400);
});
it('lists only own sources', async () => {
const region1 = await createRegion('FR', 47.997, 7.842, TEST_USER_ID);
const region2 = await createRegion('BS', 47.559, 7.589, OTHER_USER_ID);
await app.fetch(
authedRequest(`${BASE}/sources`, {
method: 'POST',
body: jsonBody({
type: 'ical',
url: 'https://a.com/cal.ics',
name: 'A',
regionId: region1.id,
}),
})
);
await app.fetch(
authedRequest(`${BASE}/sources`, {
method: 'POST',
body: jsonBody({
type: 'ical',
url: 'https://b.com/cal.ics',
name: 'B',
regionId: region2.id,
}),
user: OTHER_USER_ID,
})
);
const res = await app.fetch(authedRequest(`${BASE}/sources`));
const { sources } = await res.json();
expect(sources).toHaveLength(1);
expect(sources[0].name).toBe('A');
});
it('deletes a source', async () => {
const region = await createRegion();
const createRes = await app.fetch(
authedRequest(`${BASE}/sources`, {
method: 'POST',
body: jsonBody({
type: 'ical',
url: 'https://x.com/cal.ics',
name: 'X',
regionId: region.id,
}),
})
);
const { source } = await createRes.json();
const delRes = await app.fetch(
authedRequest(`${BASE}/sources/${source.id}`, { method: 'DELETE' })
);
expect(delRes.status).toBe(200);
});
});
// ─── Feed ───────────────────────────────────────────────────────────
describe('Discovery Feed', () => {
it('returns empty feed when no sources exist', async () => {
await createRegion();
const res = await app.fetch(authedRequest(`${BASE}/feed`));
expect(res.status).toBe(200);
const { events, hasMore } = await res.json();
expect(events).toHaveLength(0);
expect(hasMore).toBe(false);
});
it('records save action', async () => {
const region = await createRegion();
// Insert a discovered event directly to test the action endpoint
const { sql: rawSql } = await import('drizzle-orm');
const futureDate = new Date(Date.now() + 7 * 86400000).toISOString();
// First create a source so we have a source_id for the FK
const srcRes = await app.fetch(
authedRequest(`${BASE}/sources`, {
method: 'POST',
body: jsonBody({
type: 'ical',
url: 'https://test.com/cal.ics',
name: 'Test',
regionId: region.id,
}),
})
);
const { source } = await srcRes.json();
// Insert event directly into DB
await app.db.execute(
rawSql`INSERT INTO event_discovery.discovered_events
(id, source_id, dedupe_hash, title, start_at, source_url, crawled_at)
VALUES (
'00000000-0000-0000-0000-000000000001',
${source.id}::uuid,
'testhash123',
'Test Event',
${futureDate}::timestamptz,
'https://example.com/event',
now()
)`
);
// Record action
const actionRes = await app.fetch(
authedRequest(`${BASE}/feed/00000000-0000-0000-0000-000000000001/action`, {
method: 'POST',
body: jsonBody({ action: 'save' }),
})
);
expect(actionRes.status).toBe(200);
// Verify the action shows in feed
const feedRes = await app.fetch(authedRequest(`${BASE}/feed`));
const { events } = await feedRes.json();
expect(events).toHaveLength(1);
expect(events[0].userAction).toBe('save');
});
it('dismiss action + hideDismissed filters events', async () => {
const region = await createRegion();
const { sql: rawSql } = await import('drizzle-orm');
const futureDate = new Date(Date.now() + 7 * 86400000).toISOString();
const srcRes = await app.fetch(
authedRequest(`${BASE}/sources`, {
method: 'POST',
body: jsonBody({
type: 'ical',
url: 'https://test.com/cal.ics',
name: 'Test',
regionId: region.id,
}),
})
);
const { source } = await srcRes.json();
await app.db.execute(
rawSql`INSERT INTO event_discovery.discovered_events
(id, source_id, dedupe_hash, title, start_at, source_url, crawled_at)
VALUES (
'00000000-0000-0000-0000-000000000002',
${source.id}::uuid,
'hash-dismiss',
'Dismissed Event',
${futureDate}::timestamptz,
'https://example.com/ev2',
now()
)`
);
// Dismiss
await app.fetch(
authedRequest(`${BASE}/feed/00000000-0000-0000-0000-000000000002/action`, {
method: 'POST',
body: jsonBody({ action: 'dismiss' }),
})
);
// Without hideDismissed — shows up
const res1 = await app.fetch(authedRequest(`${BASE}/feed`));
const data1 = await res1.json();
expect(data1.events).toHaveLength(1);
expect(data1.events[0].userAction).toBe('dismiss');
// With hideDismissed — filtered out
const res2 = await app.fetch(authedRequest(`${BASE}/feed?hideDismissed=true`));
const data2 = await res2.json();
expect(data2.events).toHaveLength(0);
});
it('rejects unauthenticated requests', async () => {
const res = await app.fetch(new Request(`${BASE}/feed`));
expect(res.status).toBe(401);
});
});

View file

@ -40,6 +40,8 @@ const TEST_CONFIG: Config = {
rsvpPerTokenPerHour: 5,
rsvpMaxPerToken: 20,
},
manaResearchUrl: 'http://localhost:3068',
manaLlmUrl: 'http://localhost:3025',
};
/**
@ -71,6 +73,12 @@ export function buildTestApp(overrides: Partial<Config> = {}): TestApp {
async wipe() {
// Cascade FK from events_published handles public_rsvps + rate buckets
await db.execute(sql`DELETE FROM events.events_published`);
// Discovery tables — cascade handles discovered_events + user_actions
await db.execute(sql`DELETE FROM event_discovery.discovery_user_actions`);
await db.execute(sql`DELETE FROM event_discovery.discovered_events`);
await db.execute(sql`DELETE FROM event_discovery.discovery_sources`);
await db.execute(sql`DELETE FROM event_discovery.discovery_interests`);
await db.execute(sql`DELETE FROM event_discovery.discovery_regions`);
},
};
}

View file

@ -0,0 +1,188 @@
/**
* iCal parser unit tests no DB or network required.
* Uses parseIcalText directly with inline iCal strings.
*/
import { describe, it, expect } from 'bun:test';
import { parseIcalText } from '../discovery/ical-parser';
const SOURCE_URL = 'https://example.com/events.ics';
const SOURCE_NAME = 'Test Venue';
/** Helper: generate a date N days from now as iCal UTC string (YYYYMMDDTHHMMSSZ). */
function futureIcalDate(daysAhead: number, hour = 19): string {
const d = new Date(Date.now() + daysAhead * 86400000);
d.setUTCHours(hour, 0, 0, 0);
const pad = (n: number) => n.toString().padStart(2, '0');
return `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}T${pad(d.getUTCHours())}${pad(d.getUTCMinutes())}${pad(d.getUTCSeconds())}Z`;
}
function pastIcalDate(daysAgo: number, hour = 19): string {
return futureIcalDate(-daysAgo, hour);
}
function makeIcal(vevents: string): string {
return `BEGIN:VCALENDAR
VERSION:2.0
PRODID:-//Test//Test//EN
${vevents}
END:VCALENDAR`;
}
// ─── Basic parsing ──────────────────────────────────────────────────
describe('parseIcalText', () => {
it('extracts a single future event', () => {
const ical = makeIcal(`BEGIN:VEVENT
UID:test-uid-1@example.com
DTSTART:${futureIcalDate(3)}
DTEND:${futureIcalDate(3, 21)}
SUMMARY:Jazz Night
DESCRIPTION:Live jazz at the club.
LOCATION:Jazzhaus Freiburg
URL:https://jazzhaus.de/event/1
END:VEVENT`);
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(1);
expect(events[0].title).toBe('Jazz Night');
expect(events[0].description).toBe('Live jazz at the club.');
expect(events[0].location).toBe('Jazzhaus Freiburg');
expect(events[0].externalId).toBe('test-uid-1@example.com');
expect(events[0].sourceUrl).toBe('https://jazzhaus.de/event/1');
expect(events[0].allDay).toBe(false);
});
it('extracts multiple events', () => {
const ical = makeIcal(`BEGIN:VEVENT
UID:a@test
DTSTART:${futureIcalDate(1)}
SUMMARY:Event A
END:VEVENT
BEGIN:VEVENT
UID:b@test
DTSTART:${futureIcalDate(2)}
SUMMARY:Event B
END:VEVENT
BEGIN:VEVENT
UID:c@test
DTSTART:${futureIcalDate(5)}
SUMMARY:Event C
END:VEVENT`);
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(3);
expect(events.map((e) => e.title).sort()).toEqual(['Event A', 'Event B', 'Event C']);
});
// ─── Filtering ──────────────────────────────────────────────
it('filters out past events (> 1 day ago)', () => {
const ical = makeIcal(`BEGIN:VEVENT
UID:past@test
DTSTART:${pastIcalDate(5)}
DTEND:${pastIcalDate(5, 21)}
SUMMARY:Past Event
END:VEVENT
BEGIN:VEVENT
UID:future@test
DTSTART:${futureIcalDate(3)}
SUMMARY:Future Event
END:VEVENT`);
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(1);
expect(events[0].title).toBe('Future Event');
});
it('skips events without a summary', () => {
const ical = makeIcal(`BEGIN:VEVENT
UID:no-title@test
DTSTART:${futureIcalDate(2)}
DESCRIPTION:Has no title
END:VEVENT`);
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(0);
});
it('skips VTODO and VFREEBUSY components', () => {
const ical = makeIcal(`BEGIN:VTODO
UID:todo@test
SUMMARY:Not an event
END:VTODO
BEGIN:VEVENT
UID:real@test
DTSTART:${futureIcalDate(1)}
SUMMARY:Real Event
END:VEVENT`);
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(1);
expect(events[0].title).toBe('Real Event');
});
// ─── All-day ────────────────────────────────────────────────
it('detects all-day events (DATE type without time)', () => {
const d = new Date(Date.now() + 3 * 86400000);
const pad = (n: number) => n.toString().padStart(2, '0');
const dateStr = `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}`;
const ical = makeIcal(`BEGIN:VEVENT
UID:allday@test
DTSTART;VALUE=DATE:${dateStr}
SUMMARY:All Day Festival
END:VEVENT`);
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(1);
expect(events[0].allDay).toBe(true);
});
// ─── Fallbacks ──────────────────────────────────────────────
it('uses sourceUrl when event has no URL property', () => {
const ical = makeIcal(`BEGIN:VEVENT
UID:no-url@test
DTSTART:${futureIcalDate(2)}
SUMMARY:No URL Event
END:VEVENT`);
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
expect(events[0].sourceUrl).toBe(SOURCE_URL);
});
it('truncates long descriptions to 2000 chars', () => {
const longDesc = 'A'.repeat(3000);
const ical = makeIcal(`BEGIN:VEVENT
UID:long@test
DTSTART:${futureIcalDate(2)}
SUMMARY:Long Desc
DESCRIPTION:${longDesc}
END:VEVENT`);
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
expect(events[0].description!.length).toBe(2000);
});
it('handles empty calendar gracefully', () => {
const ical = makeIcal('');
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(0);
});
it('handles optional fields as null', () => {
const ical = makeIcal(`BEGIN:VEVENT
UID:minimal@test
DTSTART:${futureIcalDate(1)}
SUMMARY:Minimal Event
END:VEVENT`);
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
expect(events[0].description).toBeNull();
expect(events[0].location).toBeNull();
// node-ical may synthesize an end date from start when DTEND is missing
expect(events[0].startAt).toBeInstanceOf(Date);
});
});

View file

@ -0,0 +1,130 @@
/**
* Real-world iCal feed tests validates that parseIcalFeed works with
* actual public iCal feeds from the internet.
*
* These tests hit the network and may be slow or flaky. They exist to
* catch compatibility issues with real-world iCal quirks that synthetic
* test data can't cover.
*/
import { describe, it, expect } from 'bun:test';
import { parseIcalFeed } from '../discovery/ical-parser';
import { parseIcalText } from '../discovery/ical-parser';
const TIMEOUT = 20_000;
describe('Real iCal feeds', () => {
it(
'parses Mozilla Thunderbird sample calendar',
async () => {
// Mozilla ships a public demo calendar for Thunderbird / Lightning
try {
const events = await parseIcalFeed(
'https://www.mozilla.org/media/caldata/GermanHolidays.ics',
'German Holidays'
);
// May or may not have future events, but should parse without error
expect(Array.isArray(events)).toBe(true);
for (const e of events) {
expect(e.title).toBeTruthy();
expect(e.startAt).toBeInstanceOf(Date);
}
} catch {
// Network error is acceptable for CI — we're testing the parser
}
},
TIMEOUT
);
it('parses a realistic multi-event iCal with timezones', () => {
// A realistic iCal string with VTIMEZONE, multiple VEVENTs,
// different date formats, and edge cases.
const futureYear = new Date().getFullYear() + 1;
const ical = `BEGIN:VCALENDAR
VERSION:2.0
PRODID:-//Jazzhaus//Events//DE
X-WR-CALNAME:Jazzhaus Freiburg
BEGIN:VTIMEZONE
TZID:Europe/Berlin
BEGIN:DAYLIGHT
TZOFFSETFROM:+0100
TZOFFSETTO:+0200
TZNAME:CEST
DTSTART:19700329T020000
RRULE:FREQ=YEARLY;BYDAY=-1SU;BYMONTH=3
END:DAYLIGHT
BEGIN:STANDARD
TZOFFSETFROM:+0200
TZOFFSETTO:+0100
TZNAME:CET
DTSTART:19701025T030000
RRULE:FREQ=YEARLY;BYDAY=-1SU;BYMONTH=10
END:STANDARD
END:VTIMEZONE
BEGIN:VEVENT
UID:ev1@jazzhaus.de
DTSTART;TZID=Europe/Berlin:${futureYear}0515T200000
DTEND;TZID=Europe/Berlin:${futureYear}0515T230000
SUMMARY:Gregory Porter Live
DESCRIPTION:Grammy-winning jazz vocalist performs his greatest hits.\\nSpecial guest: Lizz Wright.
LOCATION:Jazzhaus Freiburg\\, Schnewlinstr. 1
URL:https://jazzhaus.de/events/gregory-porter
CATEGORIES:MUSIC,JAZZ
END:VEVENT
BEGIN:VEVENT
UID:ev2@jazzhaus.de
DTSTART;VALUE=DATE:${futureYear}0620
DTEND;VALUE=DATE:${futureYear}0622
SUMMARY:Freiburg Jazz Festival
DESCRIPTION:Three days of jazz across multiple venues.
LOCATION:Freiburg Altstadt
END:VEVENT
BEGIN:VEVENT
UID:ev3@jazzhaus.de
DTSTART;TZID=Europe/Berlin:${futureYear}0701T190000
SUMMARY:Open Mic Night
LOCATION:Jazzhaus Freiburg
END:VEVENT
END:VCALENDAR`;
const events = parseIcalText(ical, 'https://jazzhaus.de/events.ics', 'Jazzhaus');
expect(events).toHaveLength(3);
// Check the timezone-aware event
const porter = events.find((e) => e.title === 'Gregory Porter Live')!;
expect(porter).toBeTruthy();
expect(porter.description).toContain('Grammy-winning');
expect(porter.location).toContain('Jazzhaus Freiburg');
expect(porter.sourceUrl).toBe('https://jazzhaus.de/events/gregory-porter');
expect(porter.allDay).toBe(false);
// Check all-day event
const festival = events.find((e) => e.title === 'Freiburg Jazz Festival')!;
expect(festival).toBeTruthy();
expect(festival.allDay).toBe(true);
// Check minimal event
const openMic = events.find((e) => e.title === 'Open Mic Night')!;
expect(openMic).toBeTruthy();
expect(openMic.description).toBeNull();
});
it('handles escaped characters in iCal text', () => {
const futureYear = new Date().getFullYear() + 1;
const ical = `BEGIN:VCALENDAR
VERSION:2.0
BEGIN:VEVENT
UID:escaped@test
DTSTART:${futureYear}0301T190000Z
SUMMARY:Konzert: Rock & Blues\\, feat. "The Band"
DESCRIPTION:Ein Abend voller Musik.\\nMit Special Guests.\\n\\nEintritt: 15\\,00 EUR
LOCATION:E-Werk\\, Eschholzstr. 77\\, 79106 Freiburg
END:VEVENT
END:VCALENDAR`;
const events = parseIcalText(ical, 'https://test.com', 'Test');
expect(events).toHaveLength(1);
expect(events[0].title).toContain('Rock & Blues');
expect(events[0].location).toContain('E-Werk');
});
});

View file

@ -0,0 +1,133 @@
/**
* Relevance scorer unit tests.
*/
import { describe, it, expect } from 'bun:test';
import { scoreEvent, type ScoredEventInput, type ScoringContext } from '../discovery/scorer';
function makeEvent(overrides: Partial<ScoredEventInput> = {}): ScoredEventInput {
return {
title: 'Jazz Night',
category: 'music',
lat: 47.997,
lon: 7.842,
startAt: new Date(Date.now() + 3 * 86400000), // 3 days from now
...overrides,
};
}
function makeContext(overrides: Partial<ScoringContext> = {}): ScoringContext {
return {
interests: [
{ category: 'music', freetext: null, weight: 1.0 },
{ category: 'tech', freetext: 'Rust', weight: 2.0 },
],
regions: [{ lat: 47.997, lon: 7.842, radiusKm: 25 }],
...overrides,
};
}
describe('scoreEvent', () => {
it('returns base score (50) for event with no matches', () => {
const score = scoreEvent(
makeEvent({ category: 'other', title: 'Nothing', lat: null, lon: null }),
makeContext({ interests: [] })
);
// Base 50 + time proximity bonus (within 7 days) = ~60
expect(score).toBeGreaterThanOrEqual(50);
expect(score).toBeLessThanOrEqual(65);
});
it('boosts score for category match', () => {
const withMatch = scoreEvent(makeEvent({ category: 'music' }), makeContext());
const noMatch = scoreEvent(makeEvent({ category: 'other' }), makeContext());
expect(withMatch).toBeGreaterThan(noMatch);
});
it('boosts score for freetext match in title', () => {
// Use no other matching interests to avoid hitting the 100 cap
const ctx = makeContext({ interests: [{ category: 'other', freetext: 'Rust', weight: 1.0 }] });
const withMatch = scoreEvent(
makeEvent({ title: 'Rust Meetup Freiburg', category: 'sport' }),
ctx
);
const noMatch = scoreEvent(makeEvent({ title: 'Python Meetup', category: 'sport' }), ctx);
expect(withMatch).toBeGreaterThan(noMatch);
});
it('applies interest weight', () => {
const highWeight = scoreEvent(
makeEvent({ title: 'Rust Talk', category: 'tech' }),
makeContext({ interests: [{ category: 'tech', freetext: 'Rust', weight: 3.0 }] })
);
const lowWeight = scoreEvent(
makeEvent({ title: 'Rust Talk', category: 'tech' }),
makeContext({ interests: [{ category: 'tech', freetext: 'Rust', weight: 0.5 }] })
);
expect(highWeight).toBeGreaterThan(lowWeight);
});
it('penalizes distant events', () => {
const near = scoreEvent(
makeEvent({ lat: 47.997, lon: 7.842 }), // same as region center
makeContext()
);
const far = scoreEvent(
makeEvent({ lat: 48.5, lon: 8.5 }), // ~60km away
makeContext()
);
expect(near).toBeGreaterThan(far);
});
it('boosts events within 7 days more than 14 days', () => {
// Use minimal context to avoid hitting the 100 cap
const ctx = makeContext({ interests: [] });
const soon = scoreEvent(
makeEvent({ startAt: new Date(Date.now() + 3 * 86400000), category: null }),
ctx
);
const later = scoreEvent(
makeEvent({ startAt: new Date(Date.now() + 10 * 86400000), category: null }),
ctx
);
const farOut = scoreEvent(
makeEvent({ startAt: new Date(Date.now() + 30 * 86400000), category: null }),
ctx
);
expect(soon).toBeGreaterThan(later);
expect(later).toBeGreaterThanOrEqual(farOut);
});
it('gives weekend bonus', () => {
// Find the next Saturday
const now = new Date();
const daysUntilSat = (6 - now.getDay() + 7) % 7 || 7;
const saturday = new Date(now.getTime() + daysUntilSat * 86400000);
const monday = new Date(saturday.getTime() + 2 * 86400000);
const weekend = scoreEvent(makeEvent({ startAt: saturday }), makeContext());
const weekday = scoreEvent(makeEvent({ startAt: monday }), makeContext());
expect(weekend).toBeGreaterThanOrEqual(weekday);
});
it('clamps score to 0-100 range', () => {
// Lots of matching interests should not exceed 100
const manyInterests = Array.from({ length: 10 }, (_, i) => ({
category: 'music',
freetext: 'jazz',
weight: 3.0,
}));
const score = scoreEvent(
makeEvent({ title: 'jazz night', category: 'music' }),
makeContext({ interests: manyInterests })
);
expect(score).toBeLessThanOrEqual(100);
expect(score).toBeGreaterThanOrEqual(0);
});
it('handles missing coordinates gracefully', () => {
const score = scoreEvent(makeEvent({ lat: null, lon: null }), makeContext());
// Should not crash, just skip distance penalty
expect(score).toBeGreaterThan(0);
});
});

View file

@ -0,0 +1,144 @@
/**
* Website extractor unit tests tests the JSON parsing and date handling
* without hitting real LLM or mana-research services.
*/
import { describe, it, expect } from 'bun:test';
import { parseExtractedEvents } from '../discovery/website-extractor';
const SOURCE_URL = 'https://jazzhaus.de/programm';
const SOURCE_NAME = 'Jazzhaus Freiburg';
describe('parseExtractedEvents', () => {
it('parses a well-formed JSON response', () => {
const futureYear = new Date().getFullYear() + 1;
const json = JSON.stringify({
events: [
{
title: 'Jazz Night',
date: `${futureYear}-05-15`,
time: '20:00',
location: 'Jazzhaus Freiburg',
category: 'music',
priceInfo: '15 EUR',
},
{
title: 'Rock Festival',
date: `${futureYear}-06-20`,
location: 'Stadtpark',
category: 'music',
},
],
});
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(2);
expect(events[0].title).toBe('Jazz Night');
expect(events[0].location).toBe('Jazzhaus Freiburg');
expect(events[0].category).toBe('music');
expect(events[0].priceInfo).toBe('15 EUR');
expect(events[0].allDay).toBe(false); // has time
expect(events[1].title).toBe('Rock Festival');
expect(events[1].allDay).toBe(true); // no time
});
it('handles markdown-fenced JSON', () => {
const futureYear = new Date().getFullYear() + 1;
const json = `\`\`\`json
{
"events": [
{"title": "Test", "date": "${futureYear}-03-01", "time": "19:00"}
]
}
\`\`\``;
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(1);
expect(events[0].title).toBe('Test');
});
it('parses German date format (DD.MM.YYYY)', () => {
const futureYear = new Date().getFullYear() + 1;
const json = JSON.stringify({
events: [{ title: 'Fest', date: `15.06.${futureYear}`, time: '18:00' }],
});
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(1);
expect(events[0].startAt.getFullYear()).toBe(futureYear);
expect(events[0].startAt.getMonth()).toBe(5); // June = 5
});
it('filters out past events', () => {
const json = JSON.stringify({
events: [
{ title: 'Past Event', date: '2020-01-01', time: '19:00' },
{ title: 'Future Event', date: '2030-01-01', time: '19:00' },
],
});
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(1);
expect(events[0].title).toBe('Future Event');
});
it('skips events without title or date', () => {
const json = JSON.stringify({
events: [
{ title: 'Valid', date: '2030-01-01' },
{ title: '', date: '2030-01-02' },
{ title: 'No Date' },
{ date: '2030-01-03' },
],
});
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(1);
expect(events[0].title).toBe('Valid');
});
it('truncates long titles and descriptions', () => {
const json = JSON.stringify({
events: [
{
title: 'A'.repeat(300),
date: '2030-01-01',
description: 'B'.repeat(3000),
},
],
});
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
expect(events[0].title.length).toBe(200);
expect(events[0].description!.length).toBe(2000);
});
it('handles empty/invalid JSON gracefully', () => {
expect(parseExtractedEvents('', SOURCE_URL, SOURCE_NAME)).toHaveLength(0);
expect(parseExtractedEvents('not json', SOURCE_URL, SOURCE_NAME)).toHaveLength(0);
expect(parseExtractedEvents('{}', SOURCE_URL, SOURCE_NAME)).toHaveLength(0);
expect(parseExtractedEvents('{"events": "not array"}', SOURCE_URL, SOURCE_NAME)).toHaveLength(
0
);
});
it('handles endDate and endTime', () => {
const futureYear = new Date().getFullYear() + 1;
const json = JSON.stringify({
events: [
{
title: 'Festival',
date: `${futureYear}-07-01`,
time: '10:00',
endDate: `${futureYear}-07-03`,
endTime: '23:00',
},
],
});
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
expect(events).toHaveLength(1);
expect(events[0].endAt).not.toBeNull();
expect(events[0].endAt!.getDate()).toBe(3);
});
});

View file

@ -12,6 +12,8 @@ import { jwtAuth } from './middleware/jwt-auth';
import { healthRoutes } from './routes/health';
import { createEventsRoutes } from './routes/events';
import { createRsvpRoutes } from './routes/rsvp';
import { createDiscoveryRoutes } from './routes/discovery';
import { createDiscoveryFeedRoutes } from './routes/discovery-feed';
/**
* Build the Hono app. The auth middleware is injected so tests can swap
@ -42,5 +44,10 @@ export function createApp(
app.use('/api/v1/events/*', authMiddleware);
app.route('/api/v1/events', createEventsRoutes(db));
// Discovery endpoints (all authenticated)
app.use('/api/v1/discovery/*', authMiddleware);
app.route('/api/v1/discovery', createDiscoveryRoutes(db, config));
app.route('/api/v1/discovery', createDiscoveryFeedRoutes(db));
return app;
}

View file

@ -15,6 +15,9 @@ export interface Config {
// Hard cap on total RSVPs per token
rsvpMaxPerToken: number;
};
// Phase 2: external service URLs for event discovery
manaResearchUrl: string;
manaLlmUrl: string;
}
export function loadConfig(): Config {
@ -38,5 +41,7 @@ export function loadConfig(): Config {
rsvpPerTokenPerHour: parseInt(process.env.RSVP_RATE_LIMIT || '60', 10),
rsvpMaxPerToken: parseInt(process.env.RSVP_MAX_PER_TOKEN || '500', 10),
},
manaResearchUrl: process.env.MANA_RESEARCH_URL || 'http://localhost:3068',
manaLlmUrl: process.env.MANA_LLM_URL || 'http://localhost:3025',
};
}

View file

@ -0,0 +1,144 @@
/**
* Event Discovery schema regions, interests, sources, discovered events,
* and user actions (save/dismiss).
*
* All tables live in the `event_discovery` Postgres schema inside `mana_platform`.
* Data is server-authoritative (not local-first) the client caches results
* but the server owns the crawl loop and deduplication.
*/
import {
pgSchema,
uuid,
integer,
text,
real,
timestamp,
boolean,
doublePrecision,
uniqueIndex,
index,
jsonb,
} from 'drizzle-orm/pg-core';
export const discoverySchema = pgSchema('event_discovery');
// ─── Regions ────────────────────────────────────────────────────────
/** Geographic areas the user wants to discover events in. */
export const discoveryRegions = discoverySchema.table(
'discovery_regions',
{
id: uuid('id').defaultRandom().primaryKey(),
userId: text('user_id').notNull(),
label: text('label').notNull(),
lat: doublePrecision('lat').notNull(),
lon: doublePrecision('lon').notNull(),
radiusKm: integer('radius_km').default(25).notNull(),
isActive: boolean('is_active').default(true).notNull(),
createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
},
(t) => ({
userIdx: index('discovery_regions_user_idx').on(t.userId),
})
);
// ─── Interests ──────────────────────────────────────────────────────
/** User interests for relevance scoring. */
export const discoveryInterests = discoverySchema.table(
'discovery_interests',
{
id: uuid('id').defaultRandom().primaryKey(),
userId: text('user_id').notNull(),
category: text('category').notNull(),
freetext: text('freetext'),
weight: real('weight').default(1.0).notNull(),
createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
},
(t) => ({
userIdx: index('discovery_interests_user_idx').on(t.userId),
})
);
// ─── Sources ────────────────────────────────────────────────────────
/** Event sources that are periodically crawled (iCal feeds, websites, …). */
export const discoverySources = discoverySchema.table(
'discovery_sources',
{
id: uuid('id').defaultRandom().primaryKey(),
userId: text('user_id').notNull(),
type: text('type').notNull(), // 'ical' | 'website' | 'api' | 'search_query'
url: text('url'),
name: text('name').notNull(),
regionId: uuid('region_id').references(() => discoveryRegions.id, { onDelete: 'cascade' }),
crawlIntervalHours: integer('crawl_interval_hours').default(24).notNull(),
lastCrawledAt: timestamp('last_crawled_at', { withTimezone: true }),
lastSuccessAt: timestamp('last_success_at', { withTimezone: true }),
errorCount: integer('error_count').default(0).notNull(),
lastError: text('last_error'),
isActive: boolean('is_active').default(true).notNull(),
createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
updatedAt: timestamp('updated_at', { withTimezone: true }).defaultNow().notNull(),
},
(t) => ({
userActiveIdx: index('discovery_sources_user_active_idx').on(t.userId, t.isActive),
})
);
// ─── Discovered Events ──────────────────────────────────────────────
/** Normalized events found by crawling sources. Deduplicated by hash. */
export const discoveredEvents = discoverySchema.table(
'discovered_events',
{
id: uuid('id').defaultRandom().primaryKey(),
sourceId: uuid('source_id')
.notNull()
.references(() => discoverySources.id, { onDelete: 'cascade' }),
externalId: text('external_id'),
dedupeHash: text('dedupe_hash').notNull(),
title: text('title').notNull(),
description: text('description'),
location: text('location'),
lat: doublePrecision('lat'),
lon: doublePrecision('lon'),
startAt: timestamp('start_at', { withTimezone: true }).notNull(),
endAt: timestamp('end_at', { withTimezone: true }),
allDay: boolean('all_day').default(false).notNull(),
imageUrl: text('image_url'),
sourceUrl: text('source_url').notNull(),
sourceName: text('source_name'),
category: text('category'),
priceInfo: text('price_info'),
rawExtracted: jsonb('raw_extracted'),
crawledAt: timestamp('crawled_at', { withTimezone: true }).defaultNow().notNull(),
expiresAt: timestamp('expires_at', { withTimezone: true }),
},
(t) => ({
dedupeIdx: uniqueIndex('discovered_events_dedupe_idx').on(t.dedupeHash),
startIdx: index('discovered_events_start_idx').on(t.startAt),
sourceIdx: index('discovered_events_source_idx').on(t.sourceId),
})
);
// ─── User Actions ───────────────────────────────────────────────────
/** Tracks user interaction with discovered events (save, dismiss). */
export const discoveryUserActions = discoverySchema.table(
'discovery_user_actions',
{
id: uuid('id').defaultRandom().primaryKey(),
userId: text('user_id').notNull(),
eventId: uuid('event_id')
.notNull()
.references(() => discoveredEvents.id, { onDelete: 'cascade' }),
action: text('action').notNull(), // 'save' | 'dismiss'
actedAt: timestamp('acted_at', { withTimezone: true }).defaultNow().notNull(),
},
(t) => ({
userEventUnique: uniqueIndex('discovery_user_actions_user_event_idx').on(t.userId, t.eventId),
userIdx: index('discovery_user_actions_user_idx').on(t.userId),
})
);

View file

@ -1 +1,2 @@
export * from './events';
export * from './discovery';

View file

@ -0,0 +1,279 @@
/**
* Crawl scheduler periodically processes due discovery sources.
*
* Runs on a configurable interval (default 15 min). For each source
* whose crawl interval has elapsed:
* 1. Fetch + parse (iCal for now, website extraction in Phase 2)
* 2. Deduplicate via hash
* 3. Upsert into discovered_events
* 4. Update source status (last_crawled_at, error_count)
*
* Also cleans up expired events (past events older than 1 day).
*/
import { and, eq, lt, or, isNull, sql } from 'drizzle-orm';
import type { Database } from '../db/connection';
import { discoverySources, discoveredEvents } from '../db/schema/discovery';
import { parseIcalFeed } from './ical-parser';
import { extractEventsFromWebsite } from './website-extractor';
import { computeDedupeHash } from './deduplicator';
import type { NormalizedEvent } from './types';
const MAX_ERROR_COUNT = 5;
/** Find all sources due for a crawl. */
async function getDueSources(db: Database) {
return db
.select()
.from(discoverySources)
.where(
and(
eq(discoverySources.isActive, true),
or(
isNull(discoverySources.lastCrawledAt),
sql`${discoverySources.lastCrawledAt} < now() - (${discoverySources.crawlIntervalHours} || ' hours')::interval`
)
)
);
}
/** External service URLs for Phase 2 website extraction. */
interface CrawlConfig {
manaResearchUrl: string;
manaLlmUrl: string;
}
/** Crawl a single source and return normalized events. */
async function crawlSource(
source: typeof discoverySources.$inferSelect,
config?: CrawlConfig
): Promise<{ events: NormalizedEvent[]; error?: string }> {
try {
switch (source.type) {
case 'ical': {
if (!source.url) return { events: [], error: 'No URL configured' };
const events = await parseIcalFeed(source.url, source.name);
return { events };
}
case 'website': {
if (!source.url) return { events: [], error: 'No URL configured' };
if (!config)
return { events: [], error: 'Missing research/LLM config for website extraction' };
const events = await extractEventsFromWebsite(
source.url,
source.name,
config.manaResearchUrl,
config.manaLlmUrl
);
return { events };
}
default:
return { events: [], error: `Unsupported source type: ${source.type}` };
}
} catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error';
return { events: [], error: message };
}
}
/** Upsert normalized events into discovered_events. */
async function upsertEvents(
db: Database,
sourceId: string,
sourceName: string,
events: NormalizedEvent[]
): Promise<number> {
let upserted = 0;
for (const event of events) {
const dedupeHash = await computeDedupeHash(event);
const expiresAt = new Date(event.startAt.getTime() + 24 * 60 * 60 * 1000);
try {
await db
.insert(discoveredEvents)
.values({
sourceId,
externalId: event.externalId ?? null,
dedupeHash,
title: event.title,
description: event.description ?? null,
location: event.location ?? null,
lat: event.lat ?? null,
lon: event.lon ?? null,
startAt: event.startAt,
endAt: event.endAt ?? null,
allDay: event.allDay ?? false,
imageUrl: event.imageUrl ?? null,
sourceUrl: event.sourceUrl,
sourceName,
category: event.category ?? null,
priceInfo: event.priceInfo ?? null,
expiresAt,
})
.onConflictDoUpdate({
target: discoveredEvents.dedupeHash,
set: {
title: event.title,
description: event.description ?? null,
location: event.location ?? null,
startAt: event.startAt,
endAt: event.endAt ?? null,
sourceUrl: event.sourceUrl,
category: event.category ?? null,
priceInfo: event.priceInfo ?? null,
crawledAt: new Date(),
},
});
upserted++;
} catch (err) {
// Log but don't fail the whole batch for one bad event
console.error(`[discovery] failed to upsert event "${event.title}":`, err);
}
}
return upserted;
}
/** Process a single source: crawl, dedup, upsert, update status. */
async function processSource(
db: Database,
source: typeof discoverySources.$inferSelect,
config?: CrawlConfig
): Promise<void> {
const { events, error } = await crawlSource(source, config);
const now = new Date();
if (error) {
const newErrorCount = source.errorCount + 1;
await db
.update(discoverySources)
.set({
lastCrawledAt: now,
errorCount: newErrorCount,
lastError: error,
isActive: newErrorCount < MAX_ERROR_COUNT,
updatedAt: now,
})
.where(eq(discoverySources.id, source.id));
if (newErrorCount >= MAX_ERROR_COUNT) {
console.warn(
`[discovery] source "${source.name}" (${source.id}) deactivated after ${MAX_ERROR_COUNT} errors`
);
}
return;
}
const upserted = await upsertEvents(db, source.id, source.name, events);
await db
.update(discoverySources)
.set({
lastCrawledAt: now,
lastSuccessAt: now,
errorCount: 0,
lastError: null,
updatedAt: now,
})
.where(eq(discoverySources.id, source.id));
if (upserted > 0) {
console.log(`[discovery] crawled "${source.name}" — ${upserted} events upserted`);
}
}
/** Delete discovered events whose expiry has passed (past events). */
async function cleanupExpiredEvents(db: Database): Promise<number> {
const result = await db
.delete(discoveredEvents)
.where(lt(discoveredEvents.expiresAt, new Date()))
.returning({ id: discoveredEvents.id });
return result.length;
}
/** Run one tick of the crawl scheduler. */
export async function runCrawlTick(db: Database, config?: CrawlConfig): Promise<void> {
try {
const due = await getDueSources(db);
for (const source of due) {
await processSource(db, source, config);
}
const expired = await cleanupExpiredEvents(db);
if (expired > 0) {
console.log(`[discovery] cleaned up ${expired} expired events`);
}
} catch (err) {
console.error('[discovery] crawl tick failed:', err);
}
}
/**
* Start the periodic crawl scheduler. Returns a stop function.
* Default interval: 15 minutes.
*/
export function startCrawlScheduler(
db: Database,
config?: CrawlConfig,
intervalMs = 15 * 60 * 1000
): () => void {
const tick = () => runCrawlTick(db, config);
// First run shortly after boot
const bootTimer = setTimeout(tick, 10_000);
const intervalTimer = setInterval(tick, intervalMs);
return () => {
clearTimeout(bootTimer);
clearInterval(intervalTimer);
};
}
/**
* Crawl a single source immediately (triggered by user action).
* Returns the number of events upserted.
*/
export async function crawlSourceNow(
db: Database,
sourceId: string,
config?: CrawlConfig
): Promise<{ upserted: number; error?: string }> {
const sources = await db
.select()
.from(discoverySources)
.where(eq(discoverySources.id, sourceId))
.limit(1);
if (!sources[0]) return { upserted: 0, error: 'Source not found' };
const source = sources[0];
const { events, error } = await crawlSource(source, config);
const now = new Date();
if (error) {
await db
.update(discoverySources)
.set({
lastCrawledAt: now,
errorCount: source.errorCount + 1,
lastError: error,
updatedAt: now,
})
.where(eq(discoverySources.id, sourceId));
return { upserted: 0, error };
}
const upserted = await upsertEvents(db, sourceId, source.name, events);
await db
.update(discoverySources)
.set({
lastCrawledAt: now,
lastSuccessAt: now,
errorCount: 0,
lastError: null,
updatedAt: now,
})
.where(eq(discoverySources.id, sourceId));
return { upserted };
}

View file

@ -0,0 +1,27 @@
/**
* Deduplication for discovered events.
*
* Hash is based on normalized title + date + location so the same event
* from different sources (or re-crawls of the same source) collapses
* into a single row.
*/
import type { NormalizedEvent } from './types';
/**
* Compute a SHA-256 hex hash for deduplication.
* Key components: lowercased title + ISO date (no time) + lowercased location.
*/
export async function computeDedupeHash(event: NormalizedEvent): Promise<string> {
const title = event.title.toLowerCase().trim();
const date = event.startAt.toISOString().slice(0, 10); // YYYY-MM-DD
const location = (event.location ?? '').toLowerCase().trim();
const input = `${title}|${date}|${location}`;
const encoded = new TextEncoder().encode(input);
const hashBuffer = await crypto.subtle.digest('SHA-256', encoded);
const hashArray = new Uint8Array(hashBuffer);
return Array.from(hashArray)
.map((b) => b.toString(16).padStart(2, '0'))
.join('');
}

View file

@ -0,0 +1,96 @@
/**
* iCal (.ics) feed parser fetches a remote iCal URL and extracts
* VEVENT entries as NormalizedEvents.
*
* Uses node-ical for robust parsing of the many iCal quirks in the wild
* (timezone aliases, RRULE expansion, non-standard properties).
*/
import ical, { type VEvent } from 'node-ical';
import type { NormalizedEvent } from './types';
const FETCH_TIMEOUT_MS = 15_000;
/**
* Fetch and parse an iCal feed URL. Returns future events only.
* Handles VTIMEZONE, DTSTART/DTEND with and without timezone, RRULE.
*/
export async function parseIcalFeed(url: string, sourceName: string): Promise<NormalizedEvent[]> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
try {
const res = await fetch(url, {
signal: controller.signal,
headers: { 'User-Agent': 'Mana-Events-Discovery/1.0' },
});
if (!res.ok) {
throw new Error(`HTTP ${res.status} fetching ${url}`);
}
const text = await res.text();
return parseIcalText(text, url, sourceName);
} finally {
clearTimeout(timeout);
}
}
/**
* Parse raw iCal text into NormalizedEvents.
* Exported for testing without network.
*/
export function parseIcalText(
icalText: string,
sourceUrl: string,
sourceName: string
): NormalizedEvent[] {
const parsed = ical.sync.parseICS(icalText);
const now = new Date();
const events: NormalizedEvent[] = [];
for (const component of Object.values(parsed)) {
if (!component || component.type !== 'VEVENT') continue;
const vevent = component as VEvent;
const startDate = vevent.start ? new Date(vevent.start as unknown as string) : null;
if (!startDate || isNaN(startDate.getTime())) continue;
// Skip past events (allow 1 day grace for ongoing events)
const endDate = vevent.end ? new Date(vevent.end as unknown as string) : null;
const cutoff = endDate ?? startDate;
if (cutoff.getTime() < now.getTime() - 24 * 60 * 60 * 1000) continue;
const summary = typeof vevent.summary === 'string' ? vevent.summary.trim() : '';
if (!summary) continue;
const description = typeof vevent.description === 'string' ? vevent.description.trim() : null;
const location = typeof vevent.location === 'string' ? vevent.location.trim() : null;
// Detect all-day: either DTSTART is DATE (no time), or duration spans full days
const allDay =
vevent.datetype === 'date' ||
(vevent.start &&
typeof vevent.start === 'object' &&
'dateOnly' in vevent.start &&
(vevent.start as { dateOnly?: boolean }).dateOnly === true);
// Use the VEVENT UID as external ID for dedup stability across re-crawls
const uid = typeof vevent.uid === 'string' ? vevent.uid : null;
// Extract URL if present
const eventUrl = typeof vevent.url === 'string' ? vevent.url : sourceUrl;
events.push({
title: summary,
description: description ? description.slice(0, 2000) : null,
location,
startAt: startDate,
endAt: endDate,
allDay: !!allDay,
sourceUrl: eventUrl,
externalId: uid,
category: null, // iCal doesn't have a standard category we can rely on
});
}
return events;
}

View file

@ -0,0 +1,80 @@
/**
* Relevance Scorer ranks discovered events for the user's feed.
*
* Scoring dimensions:
* - Category match with user interests (+20 per match, weighted)
* - Freetext match in title (+15 per interest freetext match)
* - Distance to nearest region center (-1 per km beyond 5km)
* - Time proximity (+10 if within 7 days, +5 if within 14 days)
* - Weekend bonus (+5 for Sat/Sun events)
*
* Score range: 0100, clamped.
*/
export interface ScoredEventInput {
category: string | null;
title: string;
lat: number | null;
lon: number | null;
startAt: Date;
}
export interface ScoringContext {
interests: Array<{ category: string; freetext: string | null; weight: number }>;
regions: Array<{ lat: number; lon: number; radiusKm: number }>;
}
/**
* Compute a relevance score for a discovered event.
* Returns 0100, higher = more relevant.
*/
export function scoreEvent(event: ScoredEventInput, ctx: ScoringContext): number {
let score = 50; // Base score
// ── Category match ──────────────────────────────────────────
for (const interest of ctx.interests) {
if (event.category && event.category === interest.category) {
score += 20 * interest.weight;
}
if (interest.freetext && event.title.toLowerCase().includes(interest.freetext.toLowerCase())) {
score += 15 * interest.weight;
}
}
// ── Distance ────────────────────────────────────────────────
if (event.lat != null && event.lon != null && ctx.regions.length > 0) {
const nearest = Math.min(
...ctx.regions.map((r) => haversineKm(event.lat!, event.lon!, r.lat, r.lon))
);
// Penalty: -1 per km beyond 5km
score -= Math.max(0, nearest - 5);
}
// ── Time proximity ──────────────────────────────────────────
const daysUntil = (event.startAt.getTime() - Date.now()) / 86_400_000;
if (daysUntil >= 0 && daysUntil <= 7) score += 10;
else if (daysUntil > 7 && daysUntil <= 14) score += 5;
// ── Weekend bonus ───────────────────────────────────────────
const dow = event.startAt.getDay();
if (dow === 0 || dow === 6) score += 5;
return Math.max(0, Math.min(100, Math.round(score)));
}
/**
* Haversine distance in km between two lat/lon points.
*/
function haversineKm(lat1: number, lon1: number, lat2: number, lon2: number): number {
const R = 6371;
const dLat = toRad(lat2 - lat1);
const dLon = toRad(lon2 - lon1);
const a =
Math.sin(dLat / 2) ** 2 +
Math.cos(toRad(lat1)) * Math.cos(toRad(lat2)) * Math.sin(dLon / 2) ** 2;
return R * 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a));
}
function toRad(deg: number): number {
return (deg * Math.PI) / 180;
}

View file

@ -0,0 +1,187 @@
/**
* Source Discoverer automatically finds event sources for a region.
*
* Given a region (e.g. "Freiburg"), searches the web via mana-research
* for iCal feeds and venue websites, then inserts them as suggested
* sources the user can activate or reject.
*
* Pipeline:
* 1. Build search queries from region label
* 2. Search via mana-research POST /api/v1/search
* 3. Classify results: .ics URLs 'ical', venue/event pages 'website'
* 4. Insert as discovery_sources with is_active=false (suggested)
*/
import { eq, and } from 'drizzle-orm';
import type { Database } from '../db/connection';
import { discoverySources, discoveryRegions } from '../db/schema/discovery';
const SEARCH_TIMEOUT_MS = 15_000;
/** Patterns that indicate an iCal feed URL. */
const ICAL_PATTERNS = [/\.ics$/i, /\.ical$/i, /webcal:\/\//i, /format=ical/i, /export.*ical/i];
/** Patterns that indicate an event/venue page worth crawling. */
const EVENT_PAGE_PATTERNS = [
/veranstaltung/i,
/kalender/i,
/programm/i,
/events?\b/i,
/termine/i,
/konzert/i,
/festival/i,
/theater/i,
/what.?s.?on/i,
/schedule/i,
/agenda/i,
];
/** Search queries to discover event sources for a region. */
function buildSearchQueries(regionLabel: string): string[] {
return [
`${regionLabel} Veranstaltungskalender`,
`${regionLabel} Events Termine`,
`${regionLabel} Kulturzentrum Programm`,
`${regionLabel} Konzerte Theater Termine`,
`${regionLabel} Vereine Veranstaltungen`,
];
}
interface SearchHit {
url: string;
title: string;
snippet?: string;
}
interface SearchResponse {
success: boolean;
data?: {
results: SearchHit[];
};
}
/** Classify a URL as ical, website, or null (not relevant). */
function classifyUrl(url: string, title: string, snippet?: string): 'ical' | 'website' | null {
// Check for iCal feed
if (ICAL_PATTERNS.some((p) => p.test(url))) return 'ical';
// Check for event/venue page
const text = `${url} ${title} ${snippet ?? ''}`;
if (EVENT_PAGE_PATTERNS.some((p) => p.test(text))) return 'website';
return null;
}
/** Extract a human-readable name from a URL + title. */
function extractSourceName(url: string, title: string): string {
// Prefer the page title, trimmed to something reasonable
if (title) {
// Strip common suffixes
const cleaned = title
.replace(/\s*[-|–—]\s*(Startseite|Home|Events?|Veranstaltungen|Termine|Programm).*$/i, '')
.trim();
if (cleaned.length > 3 && cleaned.length < 100) return cleaned;
}
// Fallback: hostname
try {
return new URL(url).hostname.replace(/^www\./, '');
} catch {
return url.slice(0, 80);
}
}
export interface DiscoverResult {
suggestedCount: number;
queries: number;
searchResults: number;
}
/**
* Discover event sources for a region by searching the web.
*
* Inserts found sources with is_active=false so the user can review
* and activate them. Skips URLs that already exist as sources.
*/
export async function discoverSourcesForRegion(
db: Database,
regionId: string,
userId: string,
manaResearchUrl: string
): Promise<DiscoverResult> {
// Get the region
const [region] = await db
.select()
.from(discoveryRegions)
.where(and(eq(discoveryRegions.id, regionId), eq(discoveryRegions.userId, userId)))
.limit(1);
if (!region) throw new Error('Region not found');
// Get existing source URLs to avoid duplicates
const existingSources = await db
.select({ url: discoverySources.url })
.from(discoverySources)
.where(eq(discoverySources.userId, userId));
const existingUrls = new Set(existingSources.map((s) => s.url).filter(Boolean));
const queries = buildSearchQueries(region.label);
let totalResults = 0;
let suggestedCount = 0;
// Run searches in parallel (but limit to avoid hammering the service)
const searchResults = await Promise.all(
queries.map((query) => searchWeb(manaResearchUrl, query))
);
for (const result of searchResults) {
if (!result?.data?.results) continue;
for (const hit of result.data.results) {
totalResults++;
if (existingUrls.has(hit.url)) continue;
const type = classifyUrl(hit.url, hit.title, hit.snippet);
if (!type) continue;
const name = extractSourceName(hit.url, hit.title);
try {
await db.insert(discoverySources).values({
userId,
type,
url: hit.url,
name,
regionId,
isActive: false, // suggested — user must activate
crawlIntervalHours: type === 'ical' ? 24 : 48,
});
existingUrls.add(hit.url);
suggestedCount++;
} catch {
// Ignore dupes from parallel queries
}
}
}
return { suggestedCount, queries: queries.length, searchResults: totalResults };
}
/** Search the web via mana-research. Gracefully returns null on failure. */
async function searchWeb(manaResearchUrl: string, query: string): Promise<SearchResponse | null> {
try {
const res = await fetch(`${manaResearchUrl}/api/v1/search`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query }),
signal: AbortSignal.timeout(SEARCH_TIMEOUT_MS),
});
if (!res.ok) {
console.warn(`[source-discoverer] search failed ${res.status}: ${query}`);
return null;
}
return (await res.json()) as SearchResponse;
} catch (err) {
console.warn(`[source-discoverer] search error for "${query}":`, err);
return null;
}
}

View file

@ -0,0 +1,50 @@
/**
* Discovery domain types shared across parser, scheduler, and routes.
*/
/** A normalized event extracted from any source type. */
export interface NormalizedEvent {
title: string;
description?: string | null;
location?: string | null;
lat?: number | null;
lon?: number | null;
startAt: Date;
endAt?: Date | null;
allDay?: boolean;
imageUrl?: string | null;
sourceUrl: string;
category?: string | null;
priceInfo?: string | null;
externalId?: string | null;
}
/** Result of crawling a single source. */
export interface CrawlResult {
sourceId: string;
sourceName: string;
events: NormalizedEvent[];
error?: string;
}
/** Source types supported by the crawler. */
export type SourceType = 'ical' | 'website' | 'api' | 'search_query';
/** Event categories for filtering and scoring. */
export const EVENT_CATEGORIES = [
'music',
'theater',
'art',
'tech',
'sport',
'food',
'family',
'nature',
'education',
'community',
'nightlife',
'market',
'other',
] as const;
export type EventCategory = (typeof EVENT_CATEGORIES)[number];

View file

@ -0,0 +1,236 @@
/**
* Website Extractor LLM-based event extraction from unstructured web pages.
*
* Pipeline:
* 1. Crawl the page via mana-research POST /api/v1/extract
* 2. Feed the extracted text to mana-llm with a structured output prompt
* 3. Parse the LLM response as NormalizedEvent[]
*
* Uses cheap/fast models (gemma3:4b or haiku) to keep costs low.
* Falls back gracefully on any failure one bad page doesn't crash the batch.
*/
import type { NormalizedEvent } from './types';
const EXTRACT_TIMEOUT_MS = 20_000;
const LLM_TIMEOUT_MS = 30_000;
const MAX_CONTENT_CHARS = 15_000; // Trim long pages to stay within context window
interface ExtractResponse {
success: boolean;
data?: {
content: {
title?: string;
text?: string;
markdown?: string;
html?: string;
};
};
}
interface ChatCompletionResponse {
choices: Array<{
message: {
content: string;
};
}>;
}
/**
* Extract events from a website URL.
*
* 1. Fetches + renders the page via mana-research (Firecrawl/Jina/Readability)
* 2. Sends the text to mana-llm with a structured extraction prompt
* 3. Parses JSON output into NormalizedEvent[]
*/
export async function extractEventsFromWebsite(
url: string,
sourceName: string,
manaResearchUrl: string,
manaLlmUrl: string
): Promise<NormalizedEvent[]> {
// Step 1: Extract page content
const content = await fetchPageContent(url, manaResearchUrl);
if (!content) return [];
// Step 2: LLM extraction
const events = await llmExtractEvents(content, url, sourceName, manaLlmUrl);
return events;
}
/** Fetch and extract text content from a URL via mana-research. */
async function fetchPageContent(url: string, manaResearchUrl: string): Promise<string | null> {
try {
const res = await fetch(`${manaResearchUrl}/api/v1/extract`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url }),
signal: AbortSignal.timeout(EXTRACT_TIMEOUT_MS),
});
if (!res.ok) {
console.warn(`[website-extractor] extract failed ${res.status}: ${url}`);
return null;
}
const data = (await res.json()) as ExtractResponse;
if (!data.success || !data.data?.content) return null;
// Prefer markdown > text > html
const text = data.data.content.markdown || data.data.content.text || '';
if (text.length < 50) return null; // Too short to contain events
// Trim to stay within LLM context window
return text.slice(0, MAX_CONTENT_CHARS);
} catch (err) {
console.warn(`[website-extractor] fetch error for ${url}:`, err);
return null;
}
}
/** Build the LLM system prompt for event extraction. */
function buildExtractionPrompt(): string {
const today = new Date().toISOString().slice(0, 10);
return `Du bist ein Event-Extractor. Extrahiere ALLE kommenden Veranstaltungen von der gegebenen Webseite.
Pro Event liefere:
- title (string, Pflicht) Name der Veranstaltung
- date (string, Pflicht) Startdatum im Format YYYY-MM-DD
- time (string, optional) Startzeit im Format HH:MM
- endDate (string, optional) Enddatum falls mehrtägig
- endTime (string, optional) Endzeit
- location (string, optional) Veranstaltungsort / Adresse
- description (string, optional) Kurzbeschreibung, max 300 Zeichen
- category (string, optional) Eine von: music, theater, art, tech, sport, food, family, nature, education, community, nightlife, market, other
- priceInfo (string, optional) Preis, z.B. "Eintritt frei", "15 EUR", "VVK 12 / AK 15"
Heutiges Datum: ${today}
Ignoriere vergangene Events (vor ${today}).
Antwort als JSON-Objekt mit einem "events"-Array. Kein Markdown, nur JSON.`;
}
/** Send page content to mana-llm for structured event extraction. */
async function llmExtractEvents(
pageContent: string,
sourceUrl: string,
sourceName: string,
manaLlmUrl: string
): Promise<NormalizedEvent[]> {
try {
const res = await fetch(`${manaLlmUrl}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: 'ollama/gemma3:4b',
messages: [
{ role: 'system', content: buildExtractionPrompt() },
{ role: 'user', content: `Extrahiere Events von dieser Seite:\n\n${pageContent}` },
],
max_tokens: 2048,
temperature: 0,
response_format: { type: 'json_object' },
}),
signal: AbortSignal.timeout(LLM_TIMEOUT_MS),
});
if (!res.ok) {
console.warn(`[website-extractor] LLM failed ${res.status}`);
return [];
}
const completion = (await res.json()) as ChatCompletionResponse;
const rawJson = completion.choices?.[0]?.message?.content ?? '';
return parseExtractedEvents(rawJson, sourceUrl, sourceName);
} catch (err) {
console.warn(`[website-extractor] LLM error:`, err);
return [];
}
}
/** Parse and validate LLM JSON output into NormalizedEvents. */
export function parseExtractedEvents(
rawJson: string,
sourceUrl: string,
sourceName: string
): NormalizedEvent[] {
try {
// Strip markdown fences if present
const cleaned = rawJson.replace(/^```(?:json)?\s*\n?/m, '').replace(/\n?```\s*$/m, '');
const parsed = JSON.parse(cleaned);
const rawEvents = parsed.events ?? parsed;
if (!Array.isArray(rawEvents)) return [];
const now = new Date();
const events: NormalizedEvent[] = [];
for (const raw of rawEvents) {
if (!raw.title || !raw.date) continue;
// Parse date — LLMs sometimes return "25. April 2026" instead of ISO
const startAt = parseFlexibleDate(raw.date, raw.time);
if (!startAt || isNaN(startAt.getTime())) continue;
// Skip past events
if (startAt.getTime() < now.getTime() - 24 * 60 * 60 * 1000) continue;
const endAt = raw.endDate ? parseFlexibleDate(raw.endDate, raw.endTime) : null;
events.push({
title: String(raw.title).trim().slice(0, 200),
description: raw.description ? String(raw.description).trim().slice(0, 2000) : null,
location: raw.location ? String(raw.location).trim() : null,
startAt,
endAt,
allDay: !raw.time,
sourceUrl,
category: raw.category ?? null,
priceInfo: raw.priceInfo ? String(raw.priceInfo).trim() : null,
});
}
return events;
} catch (err) {
console.warn(`[website-extractor] JSON parse error:`, err);
return [];
}
}
/** Parse dates flexibly — handles ISO, German formats, and partial dates. */
function parseFlexibleDate(dateStr: string, timeStr?: string): Date | null {
if (!dateStr) return null;
// Try ISO format first (YYYY-MM-DD)
const isoMatch = dateStr.match(/^(\d{4})-(\d{2})-(\d{2})/);
if (isoMatch) {
const [, y, m, d] = isoMatch;
const time = parseTime(timeStr);
return new Date(`${y}-${m}-${d}T${time}:00`);
}
// Try German format (DD.MM.YYYY)
const deMatch = dateStr.match(/(\d{1,2})\.(\d{1,2})\.(\d{4})/);
if (deMatch) {
const [, d, m, y] = deMatch;
const time = parseTime(timeStr);
return new Date(`${y}-${m!.padStart(2, '0')}-${d!.padStart(2, '0')}T${time}:00`);
}
// Fallback: let Date parse it
try {
const d = new Date(dateStr);
if (!isNaN(d.getTime())) return d;
} catch {
// ignore
}
return null;
}
function parseTime(timeStr?: string): string {
if (!timeStr) return '00:00';
const match = timeStr.match(/(\d{1,2}):(\d{2})/);
if (match) return `${match[1]!.padStart(2, '0')}:${match[2]}`;
return '00:00';
}

View file

@ -14,6 +14,7 @@ import { createApp } from './app';
import { loadConfig } from './config';
import { getDb } from './db/connection';
import { startRateBucketSweeper } from './lib/cleanup';
import { startCrawlScheduler } from './discovery/crawl-scheduler';
const config = loadConfig();
const db = getDb(config.databaseUrl);
@ -22,6 +23,12 @@ const db = getDb(config.databaseUrl);
// accumulate for the lifetime of long-published events.
startRateBucketSweeper(db);
// Event discovery — crawl sources (iCal feeds, websites) every 15 minutes.
startCrawlScheduler(db, {
manaResearchUrl: config.manaResearchUrl,
manaLlmUrl: config.manaLlmUrl,
});
console.log(`mana-events starting on port ${config.port}...`);
export default {

View file

@ -0,0 +1,136 @@
/**
* Discovery feed routes paginated event feed + user actions.
*
* The feed endpoint returns discovered events sorted by start date,
* filtered by date range and optional category, enriched with the
* user's action status (saved/dismissed/null).
*/
import { Hono } from 'hono';
import { z } from 'zod';
import { and, eq, gte, lte, sql, isNull, desc, asc } from 'drizzle-orm';
import type { Database } from '../db/connection';
import { discoveredEvents, discoveryUserActions, discoverySources } from '../db/schema/discovery';
import { BadRequestError } from '../lib/errors';
import type { AuthUser } from '../middleware/jwt-auth';
const feedQuerySchema = z.object({
from: z.string().datetime().optional(),
to: z.string().datetime().optional(),
category: z.string().max(50).optional(),
limit: z.coerce.number().int().min(1).max(100).optional(),
offset: z.coerce.number().int().min(0).optional(),
hideDismissed: z.coerce.boolean().optional(),
});
const actionSchema = z.object({
action: z.enum(['save', 'dismiss']),
});
export function createDiscoveryFeedRoutes(db: Database) {
const app = new Hono<{ Variables: { user: AuthUser } }>();
/**
* GET /feed paginated discovered events.
*
* Joins with user actions to include save/dismiss status.
* Only shows events from the current user's sources.
*/
app.get('/feed', async (c) => {
const user = c.get('user');
const query = feedQuerySchema.safeParse(c.req.query());
if (!query.success) throw new BadRequestError(query.error.issues[0]?.message ?? 'Invalid');
const { from, to, category, hideDismissed } = query.data;
const limit = query.data.limit ?? 20;
const offset = query.data.offset ?? 0;
// Build WHERE conditions
const conditions = [
// Only events from this user's sources
eq(discoverySources.userId, user.userId),
eq(discoverySources.isActive, true),
// Only future events (or today)
gte(discoveredEvents.startAt, from ? new Date(from) : new Date()),
];
if (to) {
conditions.push(lte(discoveredEvents.startAt, new Date(to)));
}
if (category) {
conditions.push(eq(discoveredEvents.category, category));
}
// Query events with left join on user actions
const rows = await db
.select({
id: discoveredEvents.id,
title: discoveredEvents.title,
description: discoveredEvents.description,
location: discoveredEvents.location,
lat: discoveredEvents.lat,
lon: discoveredEvents.lon,
startAt: discoveredEvents.startAt,
endAt: discoveredEvents.endAt,
allDay: discoveredEvents.allDay,
imageUrl: discoveredEvents.imageUrl,
sourceUrl: discoveredEvents.sourceUrl,
sourceName: discoveredEvents.sourceName,
category: discoveredEvents.category,
priceInfo: discoveredEvents.priceInfo,
crawledAt: discoveredEvents.crawledAt,
userAction: discoveryUserActions.action,
})
.from(discoveredEvents)
.innerJoin(discoverySources, eq(discoveredEvents.sourceId, discoverySources.id))
.leftJoin(
discoveryUserActions,
and(
eq(discoveryUserActions.eventId, discoveredEvents.id),
eq(discoveryUserActions.userId, user.userId)
)
)
.where(and(...conditions))
.orderBy(asc(discoveredEvents.startAt))
.limit(limit + 1) // fetch one extra to determine hasMore
.offset(offset);
// Filter dismissed events client-side if requested
const filtered = hideDismissed ? rows.filter((r) => r.userAction !== 'dismiss') : rows;
const hasMore = filtered.length > limit;
const events = filtered.slice(0, limit);
return c.json({ events, total: events.length, hasMore });
});
/**
* POST /feed/:eventId/action save or dismiss a discovered event.
*/
app.post('/feed/:eventId/action', async (c) => {
const user = c.get('user');
const eventId = c.req.param('eventId');
const body = await c.req.json().catch(() => null);
const parsed = actionSchema.safeParse(body);
if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid');
await db
.insert(discoveryUserActions)
.values({
userId: user.userId,
eventId,
action: parsed.data.action,
})
.onConflictDoUpdate({
target: [discoveryUserActions.userId, discoveryUserActions.eventId],
set: {
action: parsed.data.action,
actedAt: new Date(),
},
});
return c.json({ ok: true });
});
return app;
}

View file

@ -0,0 +1,299 @@
/**
* Discovery CRUD routes JWT-authenticated.
*
* Manages regions, interests, and sources for a user's event discovery setup.
*/
import { Hono } from 'hono';
import { z } from 'zod';
import { and, eq } from 'drizzle-orm';
import type { Database } from '../db/connection';
import { discoveryRegions, discoveryInterests, discoverySources } from '../db/schema/discovery';
import { EVENT_CATEGORIES } from '../discovery/types';
import { crawlSourceNow } from '../discovery/crawl-scheduler';
import { discoverSourcesForRegion } from '../discovery/source-discoverer';
import { BadRequestError, ForbiddenError, NotFoundError } from '../lib/errors';
import type { AuthUser } from '../middleware/jwt-auth';
import type { Config } from '../config';
// ─── Validation schemas ─────────────────────────────────────────────
const regionCreateSchema = z.object({
label: z.string().min(1).max(200),
lat: z.number().min(-90).max(90),
lon: z.number().min(-180).max(180),
radiusKm: z.number().int().min(1).max(200).optional(),
});
const regionUpdateSchema = z.object({
label: z.string().min(1).max(200).optional(),
radiusKm: z.number().int().min(1).max(200).optional(),
isActive: z.boolean().optional(),
});
const interestCreateSchema = z.object({
category: z.string().min(1).max(50),
freetext: z.string().max(200).nullable().optional(),
weight: z.number().min(0.1).max(5).optional(),
});
const sourceCreateSchema = z.object({
type: z.enum(['ical', 'website']),
url: z.string().url().max(2000),
name: z.string().min(1).max(200),
regionId: z.string().uuid(),
crawlIntervalHours: z.number().int().min(1).max(168).optional(), // max 7 days
});
// ─── Routes ─────────────────────────────────────────────────────────
export function createDiscoveryRoutes(db: Database, config?: Config) {
const app = new Hono<{ Variables: { user: AuthUser } }>();
// ── Regions ──────────────────────────────────────────────────
app.get('/regions', async (c) => {
const user = c.get('user');
const regions = await db
.select()
.from(discoveryRegions)
.where(eq(discoveryRegions.userId, user.userId));
return c.json({ regions });
});
app.post('/regions', async (c) => {
const user = c.get('user');
const body = await c.req.json().catch(() => null);
const parsed = regionCreateSchema.safeParse(body);
if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid');
const [region] = await db
.insert(discoveryRegions)
.values({
userId: user.userId,
label: parsed.data.label,
lat: parsed.data.lat,
lon: parsed.data.lon,
radiusKm: parsed.data.radiusKm ?? 25,
})
.returning();
return c.json({ region }, 201);
});
app.put('/regions/:id', async (c) => {
const user = c.get('user');
const id = c.req.param('id');
const body = await c.req.json().catch(() => null);
const parsed = regionUpdateSchema.safeParse(body);
if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid');
const existing = await db
.select()
.from(discoveryRegions)
.where(and(eq(discoveryRegions.id, id), eq(discoveryRegions.userId, user.userId)))
.limit(1);
if (!existing[0]) throw new NotFoundError('Region not found');
const updates: Partial<typeof discoveryRegions.$inferInsert> = {};
if (parsed.data.label !== undefined) updates.label = parsed.data.label;
if (parsed.data.radiusKm !== undefined) updates.radiusKm = parsed.data.radiusKm;
if (parsed.data.isActive !== undefined) updates.isActive = parsed.data.isActive;
const [region] = await db
.update(discoveryRegions)
.set(updates)
.where(eq(discoveryRegions.id, id))
.returning();
return c.json({ region });
});
app.delete('/regions/:id', async (c) => {
const user = c.get('user');
const id = c.req.param('id');
const existing = await db
.select()
.from(discoveryRegions)
.where(and(eq(discoveryRegions.id, id), eq(discoveryRegions.userId, user.userId)))
.limit(1);
if (!existing[0]) throw new NotFoundError('Region not found');
await db.delete(discoveryRegions).where(eq(discoveryRegions.id, id));
return c.json({ deleted: true });
});
// ── Interests ────────────────────────────────────────────────
app.get('/interests', async (c) => {
const user = c.get('user');
const interests = await db
.select()
.from(discoveryInterests)
.where(eq(discoveryInterests.userId, user.userId));
return c.json({ interests });
});
app.post('/interests', async (c) => {
const user = c.get('user');
const body = await c.req.json().catch(() => null);
const parsed = interestCreateSchema.safeParse(body);
if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid');
const [interest] = await db
.insert(discoveryInterests)
.values({
userId: user.userId,
category: parsed.data.category,
freetext: parsed.data.freetext ?? null,
weight: parsed.data.weight ?? 1.0,
})
.returning();
return c.json({ interest }, 201);
});
app.delete('/interests/:id', async (c) => {
const user = c.get('user');
const id = c.req.param('id');
const existing = await db
.select()
.from(discoveryInterests)
.where(and(eq(discoveryInterests.id, id), eq(discoveryInterests.userId, user.userId)))
.limit(1);
if (!existing[0]) throw new NotFoundError('Interest not found');
await db.delete(discoveryInterests).where(eq(discoveryInterests.id, id));
return c.json({ deleted: true });
});
// ── Sources ──────────────────────────────────────────────────
app.get('/sources', async (c) => {
const user = c.get('user');
const sources = await db
.select()
.from(discoverySources)
.where(eq(discoverySources.userId, user.userId));
return c.json({ sources });
});
app.post('/sources', async (c) => {
const user = c.get('user');
const body = await c.req.json().catch(() => null);
const parsed = sourceCreateSchema.safeParse(body);
if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid');
// Verify the region belongs to this user
const region = await db
.select()
.from(discoveryRegions)
.where(
and(eq(discoveryRegions.id, parsed.data.regionId), eq(discoveryRegions.userId, user.userId))
)
.limit(1);
if (!region[0]) throw new BadRequestError('Region not found');
const [source] = await db
.insert(discoverySources)
.values({
userId: user.userId,
type: parsed.data.type,
url: parsed.data.url,
name: parsed.data.name,
regionId: parsed.data.regionId,
crawlIntervalHours: parsed.data.crawlIntervalHours ?? 24,
})
.returning();
return c.json({ source }, 201);
});
app.delete('/sources/:id', async (c) => {
const user = c.get('user');
const id = c.req.param('id');
const existing = await db
.select()
.from(discoverySources)
.where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId)))
.limit(1);
if (!existing[0]) throw new NotFoundError('Source not found');
await db.delete(discoverySources).where(eq(discoverySources.id, id));
return c.json({ deleted: true });
});
// Trigger an immediate crawl for a source
app.post('/sources/:id/crawl', async (c) => {
const user = c.get('user');
const id = c.req.param('id');
const existing = await db
.select()
.from(discoverySources)
.where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId)))
.limit(1);
if (!existing[0]) throw new NotFoundError('Source not found');
const crawlConfig = config
? { manaResearchUrl: config.manaResearchUrl, manaLlmUrl: config.manaLlmUrl }
: undefined;
const result = await crawlSourceNow(db, id, crawlConfig);
return c.json(result);
});
// ── Source Discovery (Phase 2) ───────────────────────────────
// Auto-discover event sources for a region via web search
app.post('/regions/:id/discover-sources', async (c) => {
const user = c.get('user');
const regionId = c.req.param('id');
if (!config) throw new BadRequestError('Source discovery not configured');
const result = await discoverSourcesForRegion(
db,
regionId,
user.userId,
config.manaResearchUrl
);
return c.json(result);
});
// Activate a suggested source
app.put('/sources/:id/activate', async (c) => {
const user = c.get('user');
const id = c.req.param('id');
const existing = await db
.select()
.from(discoverySources)
.where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId)))
.limit(1);
if (!existing[0]) throw new NotFoundError('Source not found');
const [source] = await db
.update(discoverySources)
.set({ isActive: true, updatedAt: new Date() })
.where(eq(discoverySources.id, id))
.returning();
// Trigger immediate crawl for the newly activated source
const crawlConfig = config
? { manaResearchUrl: config.manaResearchUrl, manaLlmUrl: config.manaLlmUrl }
: undefined;
crawlSourceNow(db, id, crawlConfig).catch(() => {});
return c.json({ source });
});
// Reject a suggested source
app.delete('/sources/:id/reject', async (c) => {
const user = c.get('user');
const id = c.req.param('id');
const existing = await db
.select()
.from(discoverySources)
.where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId)))
.limit(1);
if (!existing[0]) throw new NotFoundError('Source not found');
await db.delete(discoverySources).where(eq(discoverySources.id, id));
return c.json({ deleted: true });
});
return app;
}