mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-20 07:23:37 +02:00
feat(events): add Event Discovery — Phase 1 + 2
Phase 1: Manual iCal feeds + Discovery tab - 5 new DB tables in event_discovery schema (regions, interests, sources, discovered_events, user_actions) - iCal parser (node-ical) with deduplication (SHA-256 hash) - Crawl scheduler (15-min interval, auto-deactivate after 5 errors) - CRUD routes for regions, interests, sources + paginated feed endpoint - Frontend: "Meine Events" / "Entdecken" tab navigation in ListView - Discovery setup wizard (regions via mana-geocoding + interests) - DiscoveredEventCard with save/dismiss, SourceManager for iCal feeds - "Merken" creates a local socialEvent from discovered event Phase 2: Auto source discovery + LLM extraction + relevance scoring - Source discoverer: web search via mana-research to auto-find iCal feeds and venue websites for a region - Website extractor: crawl via mana-research /extract, then LLM-based event extraction via mana-llm with structured JSON output - Flexible date parsing (ISO, DD.MM.YYYY), markdown fence stripping - Relevance scorer: category match, freetext match, haversine distance, time proximity, weekend bonus (0-100 clamped) - Routes: POST regions/:id/discover-sources, PUT/DELETE sources/:id/activate|reject - Frontend: "Automatisch finden" button, suggested vs active sources UI 107 tests (all passing), no regressions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
677123091a
commit
b5d55fdb21
34 changed files with 5105 additions and 45 deletions
|
|
@ -7,5 +7,5 @@ export default defineConfig({
|
|||
dbCredentials: {
|
||||
url: process.env.DATABASE_URL || 'postgresql://mana:devpassword@localhost:5432/mana_platform',
|
||||
},
|
||||
schemaFilter: ['events'],
|
||||
schemaFilter: ['events', 'event_discovery'],
|
||||
});
|
||||
|
|
|
|||
|
|
@ -12,10 +12,11 @@
|
|||
"db:studio": "drizzle-kit studio"
|
||||
},
|
||||
"dependencies": {
|
||||
"hono": "^4.7.0",
|
||||
"drizzle-orm": "^0.38.3",
|
||||
"postgres": "^3.4.5",
|
||||
"hono": "^4.7.0",
|
||||
"jose": "^6.1.2",
|
||||
"node-ical": "^0.26.0",
|
||||
"postgres": "^3.4.5",
|
||||
"zod": "^3.24.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
|||
80
services/mana-events/src/__tests__/deduplicator.test.ts
Normal file
80
services/mana-events/src/__tests__/deduplicator.test.ts
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Deduplicator unit tests — no DB required.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
import { computeDedupeHash } from '../discovery/deduplicator';
|
||||
import type { NormalizedEvent } from '../discovery/types';
|
||||
|
||||
function makeEvent(overrides: Partial<NormalizedEvent> = {}): NormalizedEvent {
|
||||
return {
|
||||
title: 'Jazz Night',
|
||||
startAt: new Date('2026-05-01T19:00:00Z'),
|
||||
sourceUrl: 'https://example.com/event',
|
||||
location: 'Jazzhaus Freiburg',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('computeDedupeHash', () => {
|
||||
it('produces a hex string', async () => {
|
||||
const hash = await computeDedupeHash(makeEvent());
|
||||
expect(hash).toMatch(/^[0-9a-f]{64}$/);
|
||||
});
|
||||
|
||||
it('is deterministic (same input = same hash)', async () => {
|
||||
const a = await computeDedupeHash(makeEvent());
|
||||
const b = await computeDedupeHash(makeEvent());
|
||||
expect(a).toBe(b);
|
||||
});
|
||||
|
||||
it('differs when title changes', async () => {
|
||||
const a = await computeDedupeHash(makeEvent({ title: 'Jazz Night' }));
|
||||
const b = await computeDedupeHash(makeEvent({ title: 'Rock Night' }));
|
||||
expect(a).not.toBe(b);
|
||||
});
|
||||
|
||||
it('differs when date changes', async () => {
|
||||
const a = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-01T19:00:00Z') }));
|
||||
const b = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-02T19:00:00Z') }));
|
||||
expect(a).not.toBe(b);
|
||||
});
|
||||
|
||||
it('differs when location changes', async () => {
|
||||
const a = await computeDedupeHash(makeEvent({ location: 'Jazzhaus Freiburg' }));
|
||||
const b = await computeDedupeHash(makeEvent({ location: 'E-Werk Freiburg' }));
|
||||
expect(a).not.toBe(b);
|
||||
});
|
||||
|
||||
it('is case-insensitive (title)', async () => {
|
||||
const a = await computeDedupeHash(makeEvent({ title: 'Jazz Night' }));
|
||||
const b = await computeDedupeHash(makeEvent({ title: 'jazz night' }));
|
||||
expect(a).toBe(b);
|
||||
});
|
||||
|
||||
it('is case-insensitive (location)', async () => {
|
||||
const a = await computeDedupeHash(makeEvent({ location: 'Jazzhaus Freiburg' }));
|
||||
const b = await computeDedupeHash(makeEvent({ location: 'jazzhaus freiburg' }));
|
||||
expect(a).toBe(b);
|
||||
});
|
||||
|
||||
it('treats null and empty location the same', async () => {
|
||||
const a = await computeDedupeHash(makeEvent({ location: null }));
|
||||
const b = await computeDedupeHash(makeEvent({ location: '' }));
|
||||
expect(a).toBe(b);
|
||||
});
|
||||
|
||||
it('ignores time-of-day (same calendar date = same hash)', async () => {
|
||||
const a = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-01T10:00:00Z') }));
|
||||
const b = await computeDedupeHash(makeEvent({ startAt: new Date('2026-05-01T22:00:00Z') }));
|
||||
expect(a).toBe(b);
|
||||
});
|
||||
|
||||
it('trims whitespace from title and location', async () => {
|
||||
const a = await computeDedupeHash(
|
||||
makeEvent({ title: ' Jazz Night ', location: ' Jazzhaus ' })
|
||||
);
|
||||
const b = await computeDedupeHash(makeEvent({ title: 'Jazz Night', location: 'Jazzhaus' }));
|
||||
expect(a).toBe(b);
|
||||
});
|
||||
});
|
||||
389
services/mana-events/src/__tests__/discovery.test.ts
Normal file
389
services/mana-events/src/__tests__/discovery.test.ts
Normal file
|
|
@ -0,0 +1,389 @@
|
|||
/**
|
||||
* Discovery route integration tests.
|
||||
*
|
||||
* Tests CRUD for regions, interests, sources, and the feed endpoint.
|
||||
* Uses the same mock-auth pattern as existing mana-events tests.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterAll } from 'bun:test';
|
||||
import { buildTestApp, authedRequest, jsonBody, TEST_USER_ID, OTHER_USER_ID } from './helpers';
|
||||
|
||||
const app = buildTestApp();
|
||||
|
||||
const BASE = 'http://test/api/v1/discovery';
|
||||
|
||||
beforeEach(async () => {
|
||||
await app.wipe();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await app.wipe();
|
||||
});
|
||||
|
||||
// ─── Helper ─────────────────────────────────────────────────────────
|
||||
|
||||
async function createRegion(label = 'Freiburg', lat = 47.997, lon = 7.842, user = TEST_USER_ID) {
|
||||
const res = await app.fetch(
|
||||
authedRequest(`${BASE}/regions`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({ label, lat, lon, radiusKm: 25 }),
|
||||
user,
|
||||
})
|
||||
);
|
||||
expect(res.status).toBe(201);
|
||||
const data = await res.json();
|
||||
return data.region;
|
||||
}
|
||||
|
||||
// ─── Regions ────────────────────────────────────────────────────────
|
||||
|
||||
describe('Discovery Regions', () => {
|
||||
it('creates a region', async () => {
|
||||
const region = await createRegion();
|
||||
expect(region.label).toBe('Freiburg');
|
||||
expect(region.lat).toBe(47.997);
|
||||
expect(region.lon).toBe(7.842);
|
||||
expect(region.radiusKm).toBe(25);
|
||||
expect(region.isActive).toBe(true);
|
||||
});
|
||||
|
||||
it('lists only own regions', async () => {
|
||||
await createRegion('Freiburg', 47.997, 7.842, TEST_USER_ID);
|
||||
await createRegion('Basel', 47.559, 7.589, OTHER_USER_ID);
|
||||
|
||||
const res = await app.fetch(authedRequest(`${BASE}/regions`));
|
||||
const { regions } = await res.json();
|
||||
expect(regions).toHaveLength(1);
|
||||
expect(regions[0].label).toBe('Freiburg');
|
||||
});
|
||||
|
||||
it('updates a region', async () => {
|
||||
const region = await createRegion();
|
||||
const res = await app.fetch(
|
||||
authedRequest(`${BASE}/regions/${region.id}`, {
|
||||
method: 'PUT',
|
||||
body: jsonBody({ radiusKm: 50, label: 'Freiburg im Breisgau' }),
|
||||
})
|
||||
);
|
||||
expect(res.status).toBe(200);
|
||||
const { region: updated } = await res.json();
|
||||
expect(updated.radiusKm).toBe(50);
|
||||
expect(updated.label).toBe('Freiburg im Breisgau');
|
||||
});
|
||||
|
||||
it('rejects updating another user region', async () => {
|
||||
const region = await createRegion('Basel', 47.559, 7.589, OTHER_USER_ID);
|
||||
const res = await app.fetch(
|
||||
authedRequest(`${BASE}/regions/${region.id}`, {
|
||||
method: 'PUT',
|
||||
body: jsonBody({ radiusKm: 100 }),
|
||||
})
|
||||
);
|
||||
expect(res.status).toBe(404);
|
||||
});
|
||||
|
||||
it('deletes a region (cascades to sources)', async () => {
|
||||
const region = await createRegion();
|
||||
const delRes = await app.fetch(
|
||||
authedRequest(`${BASE}/regions/${region.id}`, { method: 'DELETE' })
|
||||
);
|
||||
expect(delRes.status).toBe(200);
|
||||
|
||||
const listRes = await app.fetch(authedRequest(`${BASE}/regions`));
|
||||
const { regions } = await listRes.json();
|
||||
expect(regions).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('rejects invalid coordinates', async () => {
|
||||
const res = await app.fetch(
|
||||
authedRequest(`${BASE}/regions`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({ label: 'Bad', lat: 999, lon: 7.0 }),
|
||||
})
|
||||
);
|
||||
expect(res.status).toBe(400);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Interests ──────────────────────────────────────────────────────
|
||||
|
||||
describe('Discovery Interests', () => {
|
||||
it('creates and lists interests', async () => {
|
||||
const res1 = await app.fetch(
|
||||
authedRequest(`${BASE}/interests`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({ category: 'music' }),
|
||||
})
|
||||
);
|
||||
expect(res1.status).toBe(201);
|
||||
|
||||
const res2 = await app.fetch(
|
||||
authedRequest(`${BASE}/interests`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({ category: 'tech', freetext: 'Rust Meetups', weight: 2.0 }),
|
||||
})
|
||||
);
|
||||
expect(res2.status).toBe(201);
|
||||
|
||||
const listRes = await app.fetch(authedRequest(`${BASE}/interests`));
|
||||
const { interests } = await listRes.json();
|
||||
expect(interests).toHaveLength(2);
|
||||
expect(interests.find((i: { category: string }) => i.category === 'tech').freetext).toBe(
|
||||
'Rust Meetups'
|
||||
);
|
||||
expect(interests.find((i: { category: string }) => i.category === 'tech').weight).toBe(2.0);
|
||||
});
|
||||
|
||||
it('deletes an interest', async () => {
|
||||
const createRes = await app.fetch(
|
||||
authedRequest(`${BASE}/interests`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({ category: 'sport' }),
|
||||
})
|
||||
);
|
||||
const { interest } = await createRes.json();
|
||||
|
||||
const delRes = await app.fetch(
|
||||
authedRequest(`${BASE}/interests/${interest.id}`, { method: 'DELETE' })
|
||||
);
|
||||
expect(delRes.status).toBe(200);
|
||||
|
||||
const listRes = await app.fetch(authedRequest(`${BASE}/interests`));
|
||||
const { interests } = await listRes.json();
|
||||
expect(interests).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Sources ────────────────────────────────────────────────────────
|
||||
|
||||
describe('Discovery Sources', () => {
|
||||
it('creates a source linked to a region', async () => {
|
||||
const region = await createRegion();
|
||||
const res = await app.fetch(
|
||||
authedRequest(`${BASE}/sources`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({
|
||||
type: 'ical',
|
||||
url: 'https://example.com/events.ics',
|
||||
name: 'Test Venue',
|
||||
regionId: region.id,
|
||||
}),
|
||||
})
|
||||
);
|
||||
expect(res.status).toBe(201);
|
||||
const { source } = await res.json();
|
||||
expect(source.name).toBe('Test Venue');
|
||||
expect(source.type).toBe('ical');
|
||||
expect(source.regionId).toBe(region.id);
|
||||
expect(source.isActive).toBe(true);
|
||||
expect(source.errorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('rejects source with invalid region', async () => {
|
||||
const res = await app.fetch(
|
||||
authedRequest(`${BASE}/sources`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({
|
||||
type: 'ical',
|
||||
url: 'https://example.com/events.ics',
|
||||
name: 'Bad',
|
||||
regionId: '00000000-0000-0000-0000-000000000000',
|
||||
}),
|
||||
})
|
||||
);
|
||||
expect(res.status).toBe(400);
|
||||
});
|
||||
|
||||
it('rejects source for another user region', async () => {
|
||||
const region = await createRegion('Basel', 47.559, 7.589, OTHER_USER_ID);
|
||||
const res = await app.fetch(
|
||||
authedRequest(`${BASE}/sources`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({
|
||||
type: 'ical',
|
||||
url: 'https://example.com/events.ics',
|
||||
name: 'Sneaky',
|
||||
regionId: region.id,
|
||||
}),
|
||||
})
|
||||
);
|
||||
expect(res.status).toBe(400);
|
||||
});
|
||||
|
||||
it('lists only own sources', async () => {
|
||||
const region1 = await createRegion('FR', 47.997, 7.842, TEST_USER_ID);
|
||||
const region2 = await createRegion('BS', 47.559, 7.589, OTHER_USER_ID);
|
||||
|
||||
await app.fetch(
|
||||
authedRequest(`${BASE}/sources`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({
|
||||
type: 'ical',
|
||||
url: 'https://a.com/cal.ics',
|
||||
name: 'A',
|
||||
regionId: region1.id,
|
||||
}),
|
||||
})
|
||||
);
|
||||
await app.fetch(
|
||||
authedRequest(`${BASE}/sources`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({
|
||||
type: 'ical',
|
||||
url: 'https://b.com/cal.ics',
|
||||
name: 'B',
|
||||
regionId: region2.id,
|
||||
}),
|
||||
user: OTHER_USER_ID,
|
||||
})
|
||||
);
|
||||
|
||||
const res = await app.fetch(authedRequest(`${BASE}/sources`));
|
||||
const { sources } = await res.json();
|
||||
expect(sources).toHaveLength(1);
|
||||
expect(sources[0].name).toBe('A');
|
||||
});
|
||||
|
||||
it('deletes a source', async () => {
|
||||
const region = await createRegion();
|
||||
const createRes = await app.fetch(
|
||||
authedRequest(`${BASE}/sources`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({
|
||||
type: 'ical',
|
||||
url: 'https://x.com/cal.ics',
|
||||
name: 'X',
|
||||
regionId: region.id,
|
||||
}),
|
||||
})
|
||||
);
|
||||
const { source } = await createRes.json();
|
||||
|
||||
const delRes = await app.fetch(
|
||||
authedRequest(`${BASE}/sources/${source.id}`, { method: 'DELETE' })
|
||||
);
|
||||
expect(delRes.status).toBe(200);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Feed ───────────────────────────────────────────────────────────
|
||||
|
||||
describe('Discovery Feed', () => {
|
||||
it('returns empty feed when no sources exist', async () => {
|
||||
await createRegion();
|
||||
const res = await app.fetch(authedRequest(`${BASE}/feed`));
|
||||
expect(res.status).toBe(200);
|
||||
const { events, hasMore } = await res.json();
|
||||
expect(events).toHaveLength(0);
|
||||
expect(hasMore).toBe(false);
|
||||
});
|
||||
|
||||
it('records save action', async () => {
|
||||
const region = await createRegion();
|
||||
|
||||
// Insert a discovered event directly to test the action endpoint
|
||||
const { sql: rawSql } = await import('drizzle-orm');
|
||||
const futureDate = new Date(Date.now() + 7 * 86400000).toISOString();
|
||||
|
||||
// First create a source so we have a source_id for the FK
|
||||
const srcRes = await app.fetch(
|
||||
authedRequest(`${BASE}/sources`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({
|
||||
type: 'ical',
|
||||
url: 'https://test.com/cal.ics',
|
||||
name: 'Test',
|
||||
regionId: region.id,
|
||||
}),
|
||||
})
|
||||
);
|
||||
const { source } = await srcRes.json();
|
||||
|
||||
// Insert event directly into DB
|
||||
await app.db.execute(
|
||||
rawSql`INSERT INTO event_discovery.discovered_events
|
||||
(id, source_id, dedupe_hash, title, start_at, source_url, crawled_at)
|
||||
VALUES (
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
${source.id}::uuid,
|
||||
'testhash123',
|
||||
'Test Event',
|
||||
${futureDate}::timestamptz,
|
||||
'https://example.com/event',
|
||||
now()
|
||||
)`
|
||||
);
|
||||
|
||||
// Record action
|
||||
const actionRes = await app.fetch(
|
||||
authedRequest(`${BASE}/feed/00000000-0000-0000-0000-000000000001/action`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({ action: 'save' }),
|
||||
})
|
||||
);
|
||||
expect(actionRes.status).toBe(200);
|
||||
|
||||
// Verify the action shows in feed
|
||||
const feedRes = await app.fetch(authedRequest(`${BASE}/feed`));
|
||||
const { events } = await feedRes.json();
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].userAction).toBe('save');
|
||||
});
|
||||
|
||||
it('dismiss action + hideDismissed filters events', async () => {
|
||||
const region = await createRegion();
|
||||
const { sql: rawSql } = await import('drizzle-orm');
|
||||
const futureDate = new Date(Date.now() + 7 * 86400000).toISOString();
|
||||
|
||||
const srcRes = await app.fetch(
|
||||
authedRequest(`${BASE}/sources`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({
|
||||
type: 'ical',
|
||||
url: 'https://test.com/cal.ics',
|
||||
name: 'Test',
|
||||
regionId: region.id,
|
||||
}),
|
||||
})
|
||||
);
|
||||
const { source } = await srcRes.json();
|
||||
|
||||
await app.db.execute(
|
||||
rawSql`INSERT INTO event_discovery.discovered_events
|
||||
(id, source_id, dedupe_hash, title, start_at, source_url, crawled_at)
|
||||
VALUES (
|
||||
'00000000-0000-0000-0000-000000000002',
|
||||
${source.id}::uuid,
|
||||
'hash-dismiss',
|
||||
'Dismissed Event',
|
||||
${futureDate}::timestamptz,
|
||||
'https://example.com/ev2',
|
||||
now()
|
||||
)`
|
||||
);
|
||||
|
||||
// Dismiss
|
||||
await app.fetch(
|
||||
authedRequest(`${BASE}/feed/00000000-0000-0000-0000-000000000002/action`, {
|
||||
method: 'POST',
|
||||
body: jsonBody({ action: 'dismiss' }),
|
||||
})
|
||||
);
|
||||
|
||||
// Without hideDismissed — shows up
|
||||
const res1 = await app.fetch(authedRequest(`${BASE}/feed`));
|
||||
const data1 = await res1.json();
|
||||
expect(data1.events).toHaveLength(1);
|
||||
expect(data1.events[0].userAction).toBe('dismiss');
|
||||
|
||||
// With hideDismissed — filtered out
|
||||
const res2 = await app.fetch(authedRequest(`${BASE}/feed?hideDismissed=true`));
|
||||
const data2 = await res2.json();
|
||||
expect(data2.events).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('rejects unauthenticated requests', async () => {
|
||||
const res = await app.fetch(new Request(`${BASE}/feed`));
|
||||
expect(res.status).toBe(401);
|
||||
});
|
||||
});
|
||||
|
|
@ -40,6 +40,8 @@ const TEST_CONFIG: Config = {
|
|||
rsvpPerTokenPerHour: 5,
|
||||
rsvpMaxPerToken: 20,
|
||||
},
|
||||
manaResearchUrl: 'http://localhost:3068',
|
||||
manaLlmUrl: 'http://localhost:3025',
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -71,6 +73,12 @@ export function buildTestApp(overrides: Partial<Config> = {}): TestApp {
|
|||
async wipe() {
|
||||
// Cascade FK from events_published handles public_rsvps + rate buckets
|
||||
await db.execute(sql`DELETE FROM events.events_published`);
|
||||
// Discovery tables — cascade handles discovered_events + user_actions
|
||||
await db.execute(sql`DELETE FROM event_discovery.discovery_user_actions`);
|
||||
await db.execute(sql`DELETE FROM event_discovery.discovered_events`);
|
||||
await db.execute(sql`DELETE FROM event_discovery.discovery_sources`);
|
||||
await db.execute(sql`DELETE FROM event_discovery.discovery_interests`);
|
||||
await db.execute(sql`DELETE FROM event_discovery.discovery_regions`);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
|||
188
services/mana-events/src/__tests__/ical-parser.test.ts
Normal file
188
services/mana-events/src/__tests__/ical-parser.test.ts
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
/**
|
||||
* iCal parser unit tests — no DB or network required.
|
||||
* Uses parseIcalText directly with inline iCal strings.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
import { parseIcalText } from '../discovery/ical-parser';
|
||||
|
||||
const SOURCE_URL = 'https://example.com/events.ics';
|
||||
const SOURCE_NAME = 'Test Venue';
|
||||
|
||||
/** Helper: generate a date N days from now as iCal UTC string (YYYYMMDDTHHMMSSZ). */
|
||||
function futureIcalDate(daysAhead: number, hour = 19): string {
|
||||
const d = new Date(Date.now() + daysAhead * 86400000);
|
||||
d.setUTCHours(hour, 0, 0, 0);
|
||||
const pad = (n: number) => n.toString().padStart(2, '0');
|
||||
return `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}T${pad(d.getUTCHours())}${pad(d.getUTCMinutes())}${pad(d.getUTCSeconds())}Z`;
|
||||
}
|
||||
|
||||
function pastIcalDate(daysAgo: number, hour = 19): string {
|
||||
return futureIcalDate(-daysAgo, hour);
|
||||
}
|
||||
|
||||
function makeIcal(vevents: string): string {
|
||||
return `BEGIN:VCALENDAR
|
||||
VERSION:2.0
|
||||
PRODID:-//Test//Test//EN
|
||||
${vevents}
|
||||
END:VCALENDAR`;
|
||||
}
|
||||
|
||||
// ─── Basic parsing ──────────────────────────────────────────────────
|
||||
|
||||
describe('parseIcalText', () => {
|
||||
it('extracts a single future event', () => {
|
||||
const ical = makeIcal(`BEGIN:VEVENT
|
||||
UID:test-uid-1@example.com
|
||||
DTSTART:${futureIcalDate(3)}
|
||||
DTEND:${futureIcalDate(3, 21)}
|
||||
SUMMARY:Jazz Night
|
||||
DESCRIPTION:Live jazz at the club.
|
||||
LOCATION:Jazzhaus Freiburg
|
||||
URL:https://jazzhaus.de/event/1
|
||||
END:VEVENT`);
|
||||
|
||||
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].title).toBe('Jazz Night');
|
||||
expect(events[0].description).toBe('Live jazz at the club.');
|
||||
expect(events[0].location).toBe('Jazzhaus Freiburg');
|
||||
expect(events[0].externalId).toBe('test-uid-1@example.com');
|
||||
expect(events[0].sourceUrl).toBe('https://jazzhaus.de/event/1');
|
||||
expect(events[0].allDay).toBe(false);
|
||||
});
|
||||
|
||||
it('extracts multiple events', () => {
|
||||
const ical = makeIcal(`BEGIN:VEVENT
|
||||
UID:a@test
|
||||
DTSTART:${futureIcalDate(1)}
|
||||
SUMMARY:Event A
|
||||
END:VEVENT
|
||||
BEGIN:VEVENT
|
||||
UID:b@test
|
||||
DTSTART:${futureIcalDate(2)}
|
||||
SUMMARY:Event B
|
||||
END:VEVENT
|
||||
BEGIN:VEVENT
|
||||
UID:c@test
|
||||
DTSTART:${futureIcalDate(5)}
|
||||
SUMMARY:Event C
|
||||
END:VEVENT`);
|
||||
|
||||
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(3);
|
||||
expect(events.map((e) => e.title).sort()).toEqual(['Event A', 'Event B', 'Event C']);
|
||||
});
|
||||
|
||||
// ─── Filtering ──────────────────────────────────────────────
|
||||
|
||||
it('filters out past events (> 1 day ago)', () => {
|
||||
const ical = makeIcal(`BEGIN:VEVENT
|
||||
UID:past@test
|
||||
DTSTART:${pastIcalDate(5)}
|
||||
DTEND:${pastIcalDate(5, 21)}
|
||||
SUMMARY:Past Event
|
||||
END:VEVENT
|
||||
BEGIN:VEVENT
|
||||
UID:future@test
|
||||
DTSTART:${futureIcalDate(3)}
|
||||
SUMMARY:Future Event
|
||||
END:VEVENT`);
|
||||
|
||||
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].title).toBe('Future Event');
|
||||
});
|
||||
|
||||
it('skips events without a summary', () => {
|
||||
const ical = makeIcal(`BEGIN:VEVENT
|
||||
UID:no-title@test
|
||||
DTSTART:${futureIcalDate(2)}
|
||||
DESCRIPTION:Has no title
|
||||
END:VEVENT`);
|
||||
|
||||
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('skips VTODO and VFREEBUSY components', () => {
|
||||
const ical = makeIcal(`BEGIN:VTODO
|
||||
UID:todo@test
|
||||
SUMMARY:Not an event
|
||||
END:VTODO
|
||||
BEGIN:VEVENT
|
||||
UID:real@test
|
||||
DTSTART:${futureIcalDate(1)}
|
||||
SUMMARY:Real Event
|
||||
END:VEVENT`);
|
||||
|
||||
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].title).toBe('Real Event');
|
||||
});
|
||||
|
||||
// ─── All-day ────────────────────────────────────────────────
|
||||
|
||||
it('detects all-day events (DATE type without time)', () => {
|
||||
const d = new Date(Date.now() + 3 * 86400000);
|
||||
const pad = (n: number) => n.toString().padStart(2, '0');
|
||||
const dateStr = `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}`;
|
||||
|
||||
const ical = makeIcal(`BEGIN:VEVENT
|
||||
UID:allday@test
|
||||
DTSTART;VALUE=DATE:${dateStr}
|
||||
SUMMARY:All Day Festival
|
||||
END:VEVENT`);
|
||||
|
||||
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].allDay).toBe(true);
|
||||
});
|
||||
|
||||
// ─── Fallbacks ──────────────────────────────────────────────
|
||||
|
||||
it('uses sourceUrl when event has no URL property', () => {
|
||||
const ical = makeIcal(`BEGIN:VEVENT
|
||||
UID:no-url@test
|
||||
DTSTART:${futureIcalDate(2)}
|
||||
SUMMARY:No URL Event
|
||||
END:VEVENT`);
|
||||
|
||||
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events[0].sourceUrl).toBe(SOURCE_URL);
|
||||
});
|
||||
|
||||
it('truncates long descriptions to 2000 chars', () => {
|
||||
const longDesc = 'A'.repeat(3000);
|
||||
const ical = makeIcal(`BEGIN:VEVENT
|
||||
UID:long@test
|
||||
DTSTART:${futureIcalDate(2)}
|
||||
SUMMARY:Long Desc
|
||||
DESCRIPTION:${longDesc}
|
||||
END:VEVENT`);
|
||||
|
||||
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events[0].description!.length).toBe(2000);
|
||||
});
|
||||
|
||||
it('handles empty calendar gracefully', () => {
|
||||
const ical = makeIcal('');
|
||||
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('handles optional fields as null', () => {
|
||||
const ical = makeIcal(`BEGIN:VEVENT
|
||||
UID:minimal@test
|
||||
DTSTART:${futureIcalDate(1)}
|
||||
SUMMARY:Minimal Event
|
||||
END:VEVENT`);
|
||||
|
||||
const events = parseIcalText(ical, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events[0].description).toBeNull();
|
||||
expect(events[0].location).toBeNull();
|
||||
// node-ical may synthesize an end date from start when DTEND is missing
|
||||
expect(events[0].startAt).toBeInstanceOf(Date);
|
||||
});
|
||||
});
|
||||
130
services/mana-events/src/__tests__/ical-real-feeds.test.ts
Normal file
130
services/mana-events/src/__tests__/ical-real-feeds.test.ts
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
/**
|
||||
* Real-world iCal feed tests — validates that parseIcalFeed works with
|
||||
* actual public iCal feeds from the internet.
|
||||
*
|
||||
* These tests hit the network and may be slow or flaky. They exist to
|
||||
* catch compatibility issues with real-world iCal quirks that synthetic
|
||||
* test data can't cover.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
import { parseIcalFeed } from '../discovery/ical-parser';
|
||||
import { parseIcalText } from '../discovery/ical-parser';
|
||||
|
||||
const TIMEOUT = 20_000;
|
||||
|
||||
describe('Real iCal feeds', () => {
|
||||
it(
|
||||
'parses Mozilla Thunderbird sample calendar',
|
||||
async () => {
|
||||
// Mozilla ships a public demo calendar for Thunderbird / Lightning
|
||||
try {
|
||||
const events = await parseIcalFeed(
|
||||
'https://www.mozilla.org/media/caldata/GermanHolidays.ics',
|
||||
'German Holidays'
|
||||
);
|
||||
// May or may not have future events, but should parse without error
|
||||
expect(Array.isArray(events)).toBe(true);
|
||||
for (const e of events) {
|
||||
expect(e.title).toBeTruthy();
|
||||
expect(e.startAt).toBeInstanceOf(Date);
|
||||
}
|
||||
} catch {
|
||||
// Network error is acceptable for CI — we're testing the parser
|
||||
}
|
||||
},
|
||||
TIMEOUT
|
||||
);
|
||||
|
||||
it('parses a realistic multi-event iCal with timezones', () => {
|
||||
// A realistic iCal string with VTIMEZONE, multiple VEVENTs,
|
||||
// different date formats, and edge cases.
|
||||
const futureYear = new Date().getFullYear() + 1;
|
||||
const ical = `BEGIN:VCALENDAR
|
||||
VERSION:2.0
|
||||
PRODID:-//Jazzhaus//Events//DE
|
||||
X-WR-CALNAME:Jazzhaus Freiburg
|
||||
BEGIN:VTIMEZONE
|
||||
TZID:Europe/Berlin
|
||||
BEGIN:DAYLIGHT
|
||||
TZOFFSETFROM:+0100
|
||||
TZOFFSETTO:+0200
|
||||
TZNAME:CEST
|
||||
DTSTART:19700329T020000
|
||||
RRULE:FREQ=YEARLY;BYDAY=-1SU;BYMONTH=3
|
||||
END:DAYLIGHT
|
||||
BEGIN:STANDARD
|
||||
TZOFFSETFROM:+0200
|
||||
TZOFFSETTO:+0100
|
||||
TZNAME:CET
|
||||
DTSTART:19701025T030000
|
||||
RRULE:FREQ=YEARLY;BYDAY=-1SU;BYMONTH=10
|
||||
END:STANDARD
|
||||
END:VTIMEZONE
|
||||
BEGIN:VEVENT
|
||||
UID:ev1@jazzhaus.de
|
||||
DTSTART;TZID=Europe/Berlin:${futureYear}0515T200000
|
||||
DTEND;TZID=Europe/Berlin:${futureYear}0515T230000
|
||||
SUMMARY:Gregory Porter Live
|
||||
DESCRIPTION:Grammy-winning jazz vocalist performs his greatest hits.\\nSpecial guest: Lizz Wright.
|
||||
LOCATION:Jazzhaus Freiburg\\, Schnewlinstr. 1
|
||||
URL:https://jazzhaus.de/events/gregory-porter
|
||||
CATEGORIES:MUSIC,JAZZ
|
||||
END:VEVENT
|
||||
BEGIN:VEVENT
|
||||
UID:ev2@jazzhaus.de
|
||||
DTSTART;VALUE=DATE:${futureYear}0620
|
||||
DTEND;VALUE=DATE:${futureYear}0622
|
||||
SUMMARY:Freiburg Jazz Festival
|
||||
DESCRIPTION:Three days of jazz across multiple venues.
|
||||
LOCATION:Freiburg Altstadt
|
||||
END:VEVENT
|
||||
BEGIN:VEVENT
|
||||
UID:ev3@jazzhaus.de
|
||||
DTSTART;TZID=Europe/Berlin:${futureYear}0701T190000
|
||||
SUMMARY:Open Mic Night
|
||||
LOCATION:Jazzhaus Freiburg
|
||||
END:VEVENT
|
||||
END:VCALENDAR`;
|
||||
|
||||
const events = parseIcalText(ical, 'https://jazzhaus.de/events.ics', 'Jazzhaus');
|
||||
expect(events).toHaveLength(3);
|
||||
|
||||
// Check the timezone-aware event
|
||||
const porter = events.find((e) => e.title === 'Gregory Porter Live')!;
|
||||
expect(porter).toBeTruthy();
|
||||
expect(porter.description).toContain('Grammy-winning');
|
||||
expect(porter.location).toContain('Jazzhaus Freiburg');
|
||||
expect(porter.sourceUrl).toBe('https://jazzhaus.de/events/gregory-porter');
|
||||
expect(porter.allDay).toBe(false);
|
||||
|
||||
// Check all-day event
|
||||
const festival = events.find((e) => e.title === 'Freiburg Jazz Festival')!;
|
||||
expect(festival).toBeTruthy();
|
||||
expect(festival.allDay).toBe(true);
|
||||
|
||||
// Check minimal event
|
||||
const openMic = events.find((e) => e.title === 'Open Mic Night')!;
|
||||
expect(openMic).toBeTruthy();
|
||||
expect(openMic.description).toBeNull();
|
||||
});
|
||||
|
||||
it('handles escaped characters in iCal text', () => {
|
||||
const futureYear = new Date().getFullYear() + 1;
|
||||
const ical = `BEGIN:VCALENDAR
|
||||
VERSION:2.0
|
||||
BEGIN:VEVENT
|
||||
UID:escaped@test
|
||||
DTSTART:${futureYear}0301T190000Z
|
||||
SUMMARY:Konzert: Rock & Blues\\, feat. "The Band"
|
||||
DESCRIPTION:Ein Abend voller Musik.\\nMit Special Guests.\\n\\nEintritt: 15\\,00 EUR
|
||||
LOCATION:E-Werk\\, Eschholzstr. 77\\, 79106 Freiburg
|
||||
END:VEVENT
|
||||
END:VCALENDAR`;
|
||||
|
||||
const events = parseIcalText(ical, 'https://test.com', 'Test');
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].title).toContain('Rock & Blues');
|
||||
expect(events[0].location).toContain('E-Werk');
|
||||
});
|
||||
});
|
||||
133
services/mana-events/src/__tests__/scorer.test.ts
Normal file
133
services/mana-events/src/__tests__/scorer.test.ts
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
/**
|
||||
* Relevance scorer unit tests.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
import { scoreEvent, type ScoredEventInput, type ScoringContext } from '../discovery/scorer';
|
||||
|
||||
function makeEvent(overrides: Partial<ScoredEventInput> = {}): ScoredEventInput {
|
||||
return {
|
||||
title: 'Jazz Night',
|
||||
category: 'music',
|
||||
lat: 47.997,
|
||||
lon: 7.842,
|
||||
startAt: new Date(Date.now() + 3 * 86400000), // 3 days from now
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeContext(overrides: Partial<ScoringContext> = {}): ScoringContext {
|
||||
return {
|
||||
interests: [
|
||||
{ category: 'music', freetext: null, weight: 1.0 },
|
||||
{ category: 'tech', freetext: 'Rust', weight: 2.0 },
|
||||
],
|
||||
regions: [{ lat: 47.997, lon: 7.842, radiusKm: 25 }],
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('scoreEvent', () => {
|
||||
it('returns base score (50) for event with no matches', () => {
|
||||
const score = scoreEvent(
|
||||
makeEvent({ category: 'other', title: 'Nothing', lat: null, lon: null }),
|
||||
makeContext({ interests: [] })
|
||||
);
|
||||
// Base 50 + time proximity bonus (within 7 days) = ~60
|
||||
expect(score).toBeGreaterThanOrEqual(50);
|
||||
expect(score).toBeLessThanOrEqual(65);
|
||||
});
|
||||
|
||||
it('boosts score for category match', () => {
|
||||
const withMatch = scoreEvent(makeEvent({ category: 'music' }), makeContext());
|
||||
const noMatch = scoreEvent(makeEvent({ category: 'other' }), makeContext());
|
||||
expect(withMatch).toBeGreaterThan(noMatch);
|
||||
});
|
||||
|
||||
it('boosts score for freetext match in title', () => {
|
||||
// Use no other matching interests to avoid hitting the 100 cap
|
||||
const ctx = makeContext({ interests: [{ category: 'other', freetext: 'Rust', weight: 1.0 }] });
|
||||
const withMatch = scoreEvent(
|
||||
makeEvent({ title: 'Rust Meetup Freiburg', category: 'sport' }),
|
||||
ctx
|
||||
);
|
||||
const noMatch = scoreEvent(makeEvent({ title: 'Python Meetup', category: 'sport' }), ctx);
|
||||
expect(withMatch).toBeGreaterThan(noMatch);
|
||||
});
|
||||
|
||||
it('applies interest weight', () => {
|
||||
const highWeight = scoreEvent(
|
||||
makeEvent({ title: 'Rust Talk', category: 'tech' }),
|
||||
makeContext({ interests: [{ category: 'tech', freetext: 'Rust', weight: 3.0 }] })
|
||||
);
|
||||
const lowWeight = scoreEvent(
|
||||
makeEvent({ title: 'Rust Talk', category: 'tech' }),
|
||||
makeContext({ interests: [{ category: 'tech', freetext: 'Rust', weight: 0.5 }] })
|
||||
);
|
||||
expect(highWeight).toBeGreaterThan(lowWeight);
|
||||
});
|
||||
|
||||
it('penalizes distant events', () => {
|
||||
const near = scoreEvent(
|
||||
makeEvent({ lat: 47.997, lon: 7.842 }), // same as region center
|
||||
makeContext()
|
||||
);
|
||||
const far = scoreEvent(
|
||||
makeEvent({ lat: 48.5, lon: 8.5 }), // ~60km away
|
||||
makeContext()
|
||||
);
|
||||
expect(near).toBeGreaterThan(far);
|
||||
});
|
||||
|
||||
it('boosts events within 7 days more than 14 days', () => {
|
||||
// Use minimal context to avoid hitting the 100 cap
|
||||
const ctx = makeContext({ interests: [] });
|
||||
const soon = scoreEvent(
|
||||
makeEvent({ startAt: new Date(Date.now() + 3 * 86400000), category: null }),
|
||||
ctx
|
||||
);
|
||||
const later = scoreEvent(
|
||||
makeEvent({ startAt: new Date(Date.now() + 10 * 86400000), category: null }),
|
||||
ctx
|
||||
);
|
||||
const farOut = scoreEvent(
|
||||
makeEvent({ startAt: new Date(Date.now() + 30 * 86400000), category: null }),
|
||||
ctx
|
||||
);
|
||||
expect(soon).toBeGreaterThan(later);
|
||||
expect(later).toBeGreaterThanOrEqual(farOut);
|
||||
});
|
||||
|
||||
it('gives weekend bonus', () => {
|
||||
// Find the next Saturday
|
||||
const now = new Date();
|
||||
const daysUntilSat = (6 - now.getDay() + 7) % 7 || 7;
|
||||
const saturday = new Date(now.getTime() + daysUntilSat * 86400000);
|
||||
const monday = new Date(saturday.getTime() + 2 * 86400000);
|
||||
|
||||
const weekend = scoreEvent(makeEvent({ startAt: saturday }), makeContext());
|
||||
const weekday = scoreEvent(makeEvent({ startAt: monday }), makeContext());
|
||||
expect(weekend).toBeGreaterThanOrEqual(weekday);
|
||||
});
|
||||
|
||||
it('clamps score to 0-100 range', () => {
|
||||
// Lots of matching interests should not exceed 100
|
||||
const manyInterests = Array.from({ length: 10 }, (_, i) => ({
|
||||
category: 'music',
|
||||
freetext: 'jazz',
|
||||
weight: 3.0,
|
||||
}));
|
||||
const score = scoreEvent(
|
||||
makeEvent({ title: 'jazz night', category: 'music' }),
|
||||
makeContext({ interests: manyInterests })
|
||||
);
|
||||
expect(score).toBeLessThanOrEqual(100);
|
||||
expect(score).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('handles missing coordinates gracefully', () => {
|
||||
const score = scoreEvent(makeEvent({ lat: null, lon: null }), makeContext());
|
||||
// Should not crash, just skip distance penalty
|
||||
expect(score).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
144
services/mana-events/src/__tests__/website-extractor.test.ts
Normal file
144
services/mana-events/src/__tests__/website-extractor.test.ts
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
/**
|
||||
* Website extractor unit tests — tests the JSON parsing and date handling
|
||||
* without hitting real LLM or mana-research services.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
import { parseExtractedEvents } from '../discovery/website-extractor';
|
||||
|
||||
const SOURCE_URL = 'https://jazzhaus.de/programm';
|
||||
const SOURCE_NAME = 'Jazzhaus Freiburg';
|
||||
|
||||
describe('parseExtractedEvents', () => {
|
||||
it('parses a well-formed JSON response', () => {
|
||||
const futureYear = new Date().getFullYear() + 1;
|
||||
const json = JSON.stringify({
|
||||
events: [
|
||||
{
|
||||
title: 'Jazz Night',
|
||||
date: `${futureYear}-05-15`,
|
||||
time: '20:00',
|
||||
location: 'Jazzhaus Freiburg',
|
||||
category: 'music',
|
||||
priceInfo: '15 EUR',
|
||||
},
|
||||
{
|
||||
title: 'Rock Festival',
|
||||
date: `${futureYear}-06-20`,
|
||||
location: 'Stadtpark',
|
||||
category: 'music',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(2);
|
||||
expect(events[0].title).toBe('Jazz Night');
|
||||
expect(events[0].location).toBe('Jazzhaus Freiburg');
|
||||
expect(events[0].category).toBe('music');
|
||||
expect(events[0].priceInfo).toBe('15 EUR');
|
||||
expect(events[0].allDay).toBe(false); // has time
|
||||
expect(events[1].title).toBe('Rock Festival');
|
||||
expect(events[1].allDay).toBe(true); // no time
|
||||
});
|
||||
|
||||
it('handles markdown-fenced JSON', () => {
|
||||
const futureYear = new Date().getFullYear() + 1;
|
||||
const json = `\`\`\`json
|
||||
{
|
||||
"events": [
|
||||
{"title": "Test", "date": "${futureYear}-03-01", "time": "19:00"}
|
||||
]
|
||||
}
|
||||
\`\`\``;
|
||||
|
||||
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].title).toBe('Test');
|
||||
});
|
||||
|
||||
it('parses German date format (DD.MM.YYYY)', () => {
|
||||
const futureYear = new Date().getFullYear() + 1;
|
||||
const json = JSON.stringify({
|
||||
events: [{ title: 'Fest', date: `15.06.${futureYear}`, time: '18:00' }],
|
||||
});
|
||||
|
||||
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].startAt.getFullYear()).toBe(futureYear);
|
||||
expect(events[0].startAt.getMonth()).toBe(5); // June = 5
|
||||
});
|
||||
|
||||
it('filters out past events', () => {
|
||||
const json = JSON.stringify({
|
||||
events: [
|
||||
{ title: 'Past Event', date: '2020-01-01', time: '19:00' },
|
||||
{ title: 'Future Event', date: '2030-01-01', time: '19:00' },
|
||||
],
|
||||
});
|
||||
|
||||
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].title).toBe('Future Event');
|
||||
});
|
||||
|
||||
it('skips events without title or date', () => {
|
||||
const json = JSON.stringify({
|
||||
events: [
|
||||
{ title: 'Valid', date: '2030-01-01' },
|
||||
{ title: '', date: '2030-01-02' },
|
||||
{ title: 'No Date' },
|
||||
{ date: '2030-01-03' },
|
||||
],
|
||||
});
|
||||
|
||||
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].title).toBe('Valid');
|
||||
});
|
||||
|
||||
it('truncates long titles and descriptions', () => {
|
||||
const json = JSON.stringify({
|
||||
events: [
|
||||
{
|
||||
title: 'A'.repeat(300),
|
||||
date: '2030-01-01',
|
||||
description: 'B'.repeat(3000),
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events[0].title.length).toBe(200);
|
||||
expect(events[0].description!.length).toBe(2000);
|
||||
});
|
||||
|
||||
it('handles empty/invalid JSON gracefully', () => {
|
||||
expect(parseExtractedEvents('', SOURCE_URL, SOURCE_NAME)).toHaveLength(0);
|
||||
expect(parseExtractedEvents('not json', SOURCE_URL, SOURCE_NAME)).toHaveLength(0);
|
||||
expect(parseExtractedEvents('{}', SOURCE_URL, SOURCE_NAME)).toHaveLength(0);
|
||||
expect(parseExtractedEvents('{"events": "not array"}', SOURCE_URL, SOURCE_NAME)).toHaveLength(
|
||||
0
|
||||
);
|
||||
});
|
||||
|
||||
it('handles endDate and endTime', () => {
|
||||
const futureYear = new Date().getFullYear() + 1;
|
||||
const json = JSON.stringify({
|
||||
events: [
|
||||
{
|
||||
title: 'Festival',
|
||||
date: `${futureYear}-07-01`,
|
||||
time: '10:00',
|
||||
endDate: `${futureYear}-07-03`,
|
||||
endTime: '23:00',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].endAt).not.toBeNull();
|
||||
expect(events[0].endAt!.getDate()).toBe(3);
|
||||
});
|
||||
});
|
||||
|
|
@ -12,6 +12,8 @@ import { jwtAuth } from './middleware/jwt-auth';
|
|||
import { healthRoutes } from './routes/health';
|
||||
import { createEventsRoutes } from './routes/events';
|
||||
import { createRsvpRoutes } from './routes/rsvp';
|
||||
import { createDiscoveryRoutes } from './routes/discovery';
|
||||
import { createDiscoveryFeedRoutes } from './routes/discovery-feed';
|
||||
|
||||
/**
|
||||
* Build the Hono app. The auth middleware is injected so tests can swap
|
||||
|
|
@ -42,5 +44,10 @@ export function createApp(
|
|||
app.use('/api/v1/events/*', authMiddleware);
|
||||
app.route('/api/v1/events', createEventsRoutes(db));
|
||||
|
||||
// Discovery endpoints (all authenticated)
|
||||
app.use('/api/v1/discovery/*', authMiddleware);
|
||||
app.route('/api/v1/discovery', createDiscoveryRoutes(db, config));
|
||||
app.route('/api/v1/discovery', createDiscoveryFeedRoutes(db));
|
||||
|
||||
return app;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,9 @@ export interface Config {
|
|||
// Hard cap on total RSVPs per token
|
||||
rsvpMaxPerToken: number;
|
||||
};
|
||||
// Phase 2: external service URLs for event discovery
|
||||
manaResearchUrl: string;
|
||||
manaLlmUrl: string;
|
||||
}
|
||||
|
||||
export function loadConfig(): Config {
|
||||
|
|
@ -38,5 +41,7 @@ export function loadConfig(): Config {
|
|||
rsvpPerTokenPerHour: parseInt(process.env.RSVP_RATE_LIMIT || '60', 10),
|
||||
rsvpMaxPerToken: parseInt(process.env.RSVP_MAX_PER_TOKEN || '500', 10),
|
||||
},
|
||||
manaResearchUrl: process.env.MANA_RESEARCH_URL || 'http://localhost:3068',
|
||||
manaLlmUrl: process.env.MANA_LLM_URL || 'http://localhost:3025',
|
||||
};
|
||||
}
|
||||
|
|
|
|||
144
services/mana-events/src/db/schema/discovery.ts
Normal file
144
services/mana-events/src/db/schema/discovery.ts
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
/**
|
||||
* Event Discovery schema — regions, interests, sources, discovered events,
|
||||
* and user actions (save/dismiss).
|
||||
*
|
||||
* All tables live in the `event_discovery` Postgres schema inside `mana_platform`.
|
||||
* Data is server-authoritative (not local-first) — the client caches results
|
||||
* but the server owns the crawl loop and deduplication.
|
||||
*/
|
||||
|
||||
import {
|
||||
pgSchema,
|
||||
uuid,
|
||||
integer,
|
||||
text,
|
||||
real,
|
||||
timestamp,
|
||||
boolean,
|
||||
doublePrecision,
|
||||
uniqueIndex,
|
||||
index,
|
||||
jsonb,
|
||||
} from 'drizzle-orm/pg-core';
|
||||
|
||||
export const discoverySchema = pgSchema('event_discovery');
|
||||
|
||||
// ─── Regions ────────────────────────────────────────────────────────
|
||||
|
||||
/** Geographic areas the user wants to discover events in. */
|
||||
export const discoveryRegions = discoverySchema.table(
|
||||
'discovery_regions',
|
||||
{
|
||||
id: uuid('id').defaultRandom().primaryKey(),
|
||||
userId: text('user_id').notNull(),
|
||||
label: text('label').notNull(),
|
||||
lat: doublePrecision('lat').notNull(),
|
||||
lon: doublePrecision('lon').notNull(),
|
||||
radiusKm: integer('radius_km').default(25).notNull(),
|
||||
isActive: boolean('is_active').default(true).notNull(),
|
||||
createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
},
|
||||
(t) => ({
|
||||
userIdx: index('discovery_regions_user_idx').on(t.userId),
|
||||
})
|
||||
);
|
||||
|
||||
// ─── Interests ──────────────────────────────────────────────────────
|
||||
|
||||
/** User interests for relevance scoring. */
|
||||
export const discoveryInterests = discoverySchema.table(
|
||||
'discovery_interests',
|
||||
{
|
||||
id: uuid('id').defaultRandom().primaryKey(),
|
||||
userId: text('user_id').notNull(),
|
||||
category: text('category').notNull(),
|
||||
freetext: text('freetext'),
|
||||
weight: real('weight').default(1.0).notNull(),
|
||||
createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
},
|
||||
(t) => ({
|
||||
userIdx: index('discovery_interests_user_idx').on(t.userId),
|
||||
})
|
||||
);
|
||||
|
||||
// ─── Sources ────────────────────────────────────────────────────────
|
||||
|
||||
/** Event sources that are periodically crawled (iCal feeds, websites, …). */
|
||||
export const discoverySources = discoverySchema.table(
|
||||
'discovery_sources',
|
||||
{
|
||||
id: uuid('id').defaultRandom().primaryKey(),
|
||||
userId: text('user_id').notNull(),
|
||||
type: text('type').notNull(), // 'ical' | 'website' | 'api' | 'search_query'
|
||||
url: text('url'),
|
||||
name: text('name').notNull(),
|
||||
regionId: uuid('region_id').references(() => discoveryRegions.id, { onDelete: 'cascade' }),
|
||||
crawlIntervalHours: integer('crawl_interval_hours').default(24).notNull(),
|
||||
lastCrawledAt: timestamp('last_crawled_at', { withTimezone: true }),
|
||||
lastSuccessAt: timestamp('last_success_at', { withTimezone: true }),
|
||||
errorCount: integer('error_count').default(0).notNull(),
|
||||
lastError: text('last_error'),
|
||||
isActive: boolean('is_active').default(true).notNull(),
|
||||
createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
updatedAt: timestamp('updated_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
},
|
||||
(t) => ({
|
||||
userActiveIdx: index('discovery_sources_user_active_idx').on(t.userId, t.isActive),
|
||||
})
|
||||
);
|
||||
|
||||
// ─── Discovered Events ──────────────────────────────────────────────
|
||||
|
||||
/** Normalized events found by crawling sources. Deduplicated by hash. */
|
||||
export const discoveredEvents = discoverySchema.table(
|
||||
'discovered_events',
|
||||
{
|
||||
id: uuid('id').defaultRandom().primaryKey(),
|
||||
sourceId: uuid('source_id')
|
||||
.notNull()
|
||||
.references(() => discoverySources.id, { onDelete: 'cascade' }),
|
||||
externalId: text('external_id'),
|
||||
dedupeHash: text('dedupe_hash').notNull(),
|
||||
title: text('title').notNull(),
|
||||
description: text('description'),
|
||||
location: text('location'),
|
||||
lat: doublePrecision('lat'),
|
||||
lon: doublePrecision('lon'),
|
||||
startAt: timestamp('start_at', { withTimezone: true }).notNull(),
|
||||
endAt: timestamp('end_at', { withTimezone: true }),
|
||||
allDay: boolean('all_day').default(false).notNull(),
|
||||
imageUrl: text('image_url'),
|
||||
sourceUrl: text('source_url').notNull(),
|
||||
sourceName: text('source_name'),
|
||||
category: text('category'),
|
||||
priceInfo: text('price_info'),
|
||||
rawExtracted: jsonb('raw_extracted'),
|
||||
crawledAt: timestamp('crawled_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
expiresAt: timestamp('expires_at', { withTimezone: true }),
|
||||
},
|
||||
(t) => ({
|
||||
dedupeIdx: uniqueIndex('discovered_events_dedupe_idx').on(t.dedupeHash),
|
||||
startIdx: index('discovered_events_start_idx').on(t.startAt),
|
||||
sourceIdx: index('discovered_events_source_idx').on(t.sourceId),
|
||||
})
|
||||
);
|
||||
|
||||
// ─── User Actions ───────────────────────────────────────────────────
|
||||
|
||||
/** Tracks user interaction with discovered events (save, dismiss). */
|
||||
export const discoveryUserActions = discoverySchema.table(
|
||||
'discovery_user_actions',
|
||||
{
|
||||
id: uuid('id').defaultRandom().primaryKey(),
|
||||
userId: text('user_id').notNull(),
|
||||
eventId: uuid('event_id')
|
||||
.notNull()
|
||||
.references(() => discoveredEvents.id, { onDelete: 'cascade' }),
|
||||
action: text('action').notNull(), // 'save' | 'dismiss'
|
||||
actedAt: timestamp('acted_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
},
|
||||
(t) => ({
|
||||
userEventUnique: uniqueIndex('discovery_user_actions_user_event_idx').on(t.userId, t.eventId),
|
||||
userIdx: index('discovery_user_actions_user_idx').on(t.userId),
|
||||
})
|
||||
);
|
||||
|
|
@ -1 +1,2 @@
|
|||
export * from './events';
|
||||
export * from './discovery';
|
||||
|
|
|
|||
279
services/mana-events/src/discovery/crawl-scheduler.ts
Normal file
279
services/mana-events/src/discovery/crawl-scheduler.ts
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
/**
|
||||
* Crawl scheduler — periodically processes due discovery sources.
|
||||
*
|
||||
* Runs on a configurable interval (default 15 min). For each source
|
||||
* whose crawl interval has elapsed:
|
||||
* 1. Fetch + parse (iCal for now, website extraction in Phase 2)
|
||||
* 2. Deduplicate via hash
|
||||
* 3. Upsert into discovered_events
|
||||
* 4. Update source status (last_crawled_at, error_count)
|
||||
*
|
||||
* Also cleans up expired events (past events older than 1 day).
|
||||
*/
|
||||
|
||||
import { and, eq, lt, or, isNull, sql } from 'drizzle-orm';
|
||||
import type { Database } from '../db/connection';
|
||||
import { discoverySources, discoveredEvents } from '../db/schema/discovery';
|
||||
import { parseIcalFeed } from './ical-parser';
|
||||
import { extractEventsFromWebsite } from './website-extractor';
|
||||
import { computeDedupeHash } from './deduplicator';
|
||||
import type { NormalizedEvent } from './types';
|
||||
|
||||
const MAX_ERROR_COUNT = 5;
|
||||
|
||||
/** Find all sources due for a crawl. */
|
||||
async function getDueSources(db: Database) {
|
||||
return db
|
||||
.select()
|
||||
.from(discoverySources)
|
||||
.where(
|
||||
and(
|
||||
eq(discoverySources.isActive, true),
|
||||
or(
|
||||
isNull(discoverySources.lastCrawledAt),
|
||||
sql`${discoverySources.lastCrawledAt} < now() - (${discoverySources.crawlIntervalHours} || ' hours')::interval`
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/** External service URLs for Phase 2 website extraction. */
|
||||
interface CrawlConfig {
|
||||
manaResearchUrl: string;
|
||||
manaLlmUrl: string;
|
||||
}
|
||||
|
||||
/** Crawl a single source and return normalized events. */
|
||||
async function crawlSource(
|
||||
source: typeof discoverySources.$inferSelect,
|
||||
config?: CrawlConfig
|
||||
): Promise<{ events: NormalizedEvent[]; error?: string }> {
|
||||
try {
|
||||
switch (source.type) {
|
||||
case 'ical': {
|
||||
if (!source.url) return { events: [], error: 'No URL configured' };
|
||||
const events = await parseIcalFeed(source.url, source.name);
|
||||
return { events };
|
||||
}
|
||||
case 'website': {
|
||||
if (!source.url) return { events: [], error: 'No URL configured' };
|
||||
if (!config)
|
||||
return { events: [], error: 'Missing research/LLM config for website extraction' };
|
||||
const events = await extractEventsFromWebsite(
|
||||
source.url,
|
||||
source.name,
|
||||
config.manaResearchUrl,
|
||||
config.manaLlmUrl
|
||||
);
|
||||
return { events };
|
||||
}
|
||||
default:
|
||||
return { events: [], error: `Unsupported source type: ${source.type}` };
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : 'Unknown error';
|
||||
return { events: [], error: message };
|
||||
}
|
||||
}
|
||||
|
||||
/** Upsert normalized events into discovered_events. */
|
||||
async function upsertEvents(
|
||||
db: Database,
|
||||
sourceId: string,
|
||||
sourceName: string,
|
||||
events: NormalizedEvent[]
|
||||
): Promise<number> {
|
||||
let upserted = 0;
|
||||
for (const event of events) {
|
||||
const dedupeHash = await computeDedupeHash(event);
|
||||
const expiresAt = new Date(event.startAt.getTime() + 24 * 60 * 60 * 1000);
|
||||
|
||||
try {
|
||||
await db
|
||||
.insert(discoveredEvents)
|
||||
.values({
|
||||
sourceId,
|
||||
externalId: event.externalId ?? null,
|
||||
dedupeHash,
|
||||
title: event.title,
|
||||
description: event.description ?? null,
|
||||
location: event.location ?? null,
|
||||
lat: event.lat ?? null,
|
||||
lon: event.lon ?? null,
|
||||
startAt: event.startAt,
|
||||
endAt: event.endAt ?? null,
|
||||
allDay: event.allDay ?? false,
|
||||
imageUrl: event.imageUrl ?? null,
|
||||
sourceUrl: event.sourceUrl,
|
||||
sourceName,
|
||||
category: event.category ?? null,
|
||||
priceInfo: event.priceInfo ?? null,
|
||||
expiresAt,
|
||||
})
|
||||
.onConflictDoUpdate({
|
||||
target: discoveredEvents.dedupeHash,
|
||||
set: {
|
||||
title: event.title,
|
||||
description: event.description ?? null,
|
||||
location: event.location ?? null,
|
||||
startAt: event.startAt,
|
||||
endAt: event.endAt ?? null,
|
||||
sourceUrl: event.sourceUrl,
|
||||
category: event.category ?? null,
|
||||
priceInfo: event.priceInfo ?? null,
|
||||
crawledAt: new Date(),
|
||||
},
|
||||
});
|
||||
upserted++;
|
||||
} catch (err) {
|
||||
// Log but don't fail the whole batch for one bad event
|
||||
console.error(`[discovery] failed to upsert event "${event.title}":`, err);
|
||||
}
|
||||
}
|
||||
return upserted;
|
||||
}
|
||||
|
||||
/** Process a single source: crawl, dedup, upsert, update status. */
|
||||
async function processSource(
|
||||
db: Database,
|
||||
source: typeof discoverySources.$inferSelect,
|
||||
config?: CrawlConfig
|
||||
): Promise<void> {
|
||||
const { events, error } = await crawlSource(source, config);
|
||||
const now = new Date();
|
||||
|
||||
if (error) {
|
||||
const newErrorCount = source.errorCount + 1;
|
||||
await db
|
||||
.update(discoverySources)
|
||||
.set({
|
||||
lastCrawledAt: now,
|
||||
errorCount: newErrorCount,
|
||||
lastError: error,
|
||||
isActive: newErrorCount < MAX_ERROR_COUNT,
|
||||
updatedAt: now,
|
||||
})
|
||||
.where(eq(discoverySources.id, source.id));
|
||||
|
||||
if (newErrorCount >= MAX_ERROR_COUNT) {
|
||||
console.warn(
|
||||
`[discovery] source "${source.name}" (${source.id}) deactivated after ${MAX_ERROR_COUNT} errors`
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const upserted = await upsertEvents(db, source.id, source.name, events);
|
||||
|
||||
await db
|
||||
.update(discoverySources)
|
||||
.set({
|
||||
lastCrawledAt: now,
|
||||
lastSuccessAt: now,
|
||||
errorCount: 0,
|
||||
lastError: null,
|
||||
updatedAt: now,
|
||||
})
|
||||
.where(eq(discoverySources.id, source.id));
|
||||
|
||||
if (upserted > 0) {
|
||||
console.log(`[discovery] crawled "${source.name}" — ${upserted} events upserted`);
|
||||
}
|
||||
}
|
||||
|
||||
/** Delete discovered events whose expiry has passed (past events). */
|
||||
async function cleanupExpiredEvents(db: Database): Promise<number> {
|
||||
const result = await db
|
||||
.delete(discoveredEvents)
|
||||
.where(lt(discoveredEvents.expiresAt, new Date()))
|
||||
.returning({ id: discoveredEvents.id });
|
||||
return result.length;
|
||||
}
|
||||
|
||||
/** Run one tick of the crawl scheduler. */
|
||||
export async function runCrawlTick(db: Database, config?: CrawlConfig): Promise<void> {
|
||||
try {
|
||||
const due = await getDueSources(db);
|
||||
for (const source of due) {
|
||||
await processSource(db, source, config);
|
||||
}
|
||||
|
||||
const expired = await cleanupExpiredEvents(db);
|
||||
if (expired > 0) {
|
||||
console.log(`[discovery] cleaned up ${expired} expired events`);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('[discovery] crawl tick failed:', err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the periodic crawl scheduler. Returns a stop function.
|
||||
* Default interval: 15 minutes.
|
||||
*/
|
||||
export function startCrawlScheduler(
|
||||
db: Database,
|
||||
config?: CrawlConfig,
|
||||
intervalMs = 15 * 60 * 1000
|
||||
): () => void {
|
||||
const tick = () => runCrawlTick(db, config);
|
||||
|
||||
// First run shortly after boot
|
||||
const bootTimer = setTimeout(tick, 10_000);
|
||||
const intervalTimer = setInterval(tick, intervalMs);
|
||||
|
||||
return () => {
|
||||
clearTimeout(bootTimer);
|
||||
clearInterval(intervalTimer);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Crawl a single source immediately (triggered by user action).
|
||||
* Returns the number of events upserted.
|
||||
*/
|
||||
export async function crawlSourceNow(
|
||||
db: Database,
|
||||
sourceId: string,
|
||||
config?: CrawlConfig
|
||||
): Promise<{ upserted: number; error?: string }> {
|
||||
const sources = await db
|
||||
.select()
|
||||
.from(discoverySources)
|
||||
.where(eq(discoverySources.id, sourceId))
|
||||
.limit(1);
|
||||
|
||||
if (!sources[0]) return { upserted: 0, error: 'Source not found' };
|
||||
|
||||
const source = sources[0];
|
||||
const { events, error } = await crawlSource(source, config);
|
||||
const now = new Date();
|
||||
|
||||
if (error) {
|
||||
await db
|
||||
.update(discoverySources)
|
||||
.set({
|
||||
lastCrawledAt: now,
|
||||
errorCount: source.errorCount + 1,
|
||||
lastError: error,
|
||||
updatedAt: now,
|
||||
})
|
||||
.where(eq(discoverySources.id, sourceId));
|
||||
return { upserted: 0, error };
|
||||
}
|
||||
|
||||
const upserted = await upsertEvents(db, sourceId, source.name, events);
|
||||
|
||||
await db
|
||||
.update(discoverySources)
|
||||
.set({
|
||||
lastCrawledAt: now,
|
||||
lastSuccessAt: now,
|
||||
errorCount: 0,
|
||||
lastError: null,
|
||||
updatedAt: now,
|
||||
})
|
||||
.where(eq(discoverySources.id, sourceId));
|
||||
|
||||
return { upserted };
|
||||
}
|
||||
27
services/mana-events/src/discovery/deduplicator.ts
Normal file
27
services/mana-events/src/discovery/deduplicator.ts
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
/**
|
||||
* Deduplication for discovered events.
|
||||
*
|
||||
* Hash is based on normalized title + date + location so the same event
|
||||
* from different sources (or re-crawls of the same source) collapses
|
||||
* into a single row.
|
||||
*/
|
||||
|
||||
import type { NormalizedEvent } from './types';
|
||||
|
||||
/**
|
||||
* Compute a SHA-256 hex hash for deduplication.
|
||||
* Key components: lowercased title + ISO date (no time) + lowercased location.
|
||||
*/
|
||||
export async function computeDedupeHash(event: NormalizedEvent): Promise<string> {
|
||||
const title = event.title.toLowerCase().trim();
|
||||
const date = event.startAt.toISOString().slice(0, 10); // YYYY-MM-DD
|
||||
const location = (event.location ?? '').toLowerCase().trim();
|
||||
|
||||
const input = `${title}|${date}|${location}`;
|
||||
const encoded = new TextEncoder().encode(input);
|
||||
const hashBuffer = await crypto.subtle.digest('SHA-256', encoded);
|
||||
const hashArray = new Uint8Array(hashBuffer);
|
||||
return Array.from(hashArray)
|
||||
.map((b) => b.toString(16).padStart(2, '0'))
|
||||
.join('');
|
||||
}
|
||||
96
services/mana-events/src/discovery/ical-parser.ts
Normal file
96
services/mana-events/src/discovery/ical-parser.ts
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
/**
|
||||
* iCal (.ics) feed parser — fetches a remote iCal URL and extracts
|
||||
* VEVENT entries as NormalizedEvents.
|
||||
*
|
||||
* Uses node-ical for robust parsing of the many iCal quirks in the wild
|
||||
* (timezone aliases, RRULE expansion, non-standard properties).
|
||||
*/
|
||||
|
||||
import ical, { type VEvent } from 'node-ical';
|
||||
import type { NormalizedEvent } from './types';
|
||||
|
||||
const FETCH_TIMEOUT_MS = 15_000;
|
||||
|
||||
/**
|
||||
* Fetch and parse an iCal feed URL. Returns future events only.
|
||||
* Handles VTIMEZONE, DTSTART/DTEND with and without timezone, RRULE.
|
||||
*/
|
||||
export async function parseIcalFeed(url: string, sourceName: string): Promise<NormalizedEvent[]> {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
||||
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
signal: controller.signal,
|
||||
headers: { 'User-Agent': 'Mana-Events-Discovery/1.0' },
|
||||
});
|
||||
if (!res.ok) {
|
||||
throw new Error(`HTTP ${res.status} fetching ${url}`);
|
||||
}
|
||||
const text = await res.text();
|
||||
return parseIcalText(text, url, sourceName);
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse raw iCal text into NormalizedEvents.
|
||||
* Exported for testing without network.
|
||||
*/
|
||||
export function parseIcalText(
|
||||
icalText: string,
|
||||
sourceUrl: string,
|
||||
sourceName: string
|
||||
): NormalizedEvent[] {
|
||||
const parsed = ical.sync.parseICS(icalText);
|
||||
const now = new Date();
|
||||
const events: NormalizedEvent[] = [];
|
||||
|
||||
for (const component of Object.values(parsed)) {
|
||||
if (!component || component.type !== 'VEVENT') continue;
|
||||
const vevent = component as VEvent;
|
||||
|
||||
const startDate = vevent.start ? new Date(vevent.start as unknown as string) : null;
|
||||
if (!startDate || isNaN(startDate.getTime())) continue;
|
||||
|
||||
// Skip past events (allow 1 day grace for ongoing events)
|
||||
const endDate = vevent.end ? new Date(vevent.end as unknown as string) : null;
|
||||
const cutoff = endDate ?? startDate;
|
||||
if (cutoff.getTime() < now.getTime() - 24 * 60 * 60 * 1000) continue;
|
||||
|
||||
const summary = typeof vevent.summary === 'string' ? vevent.summary.trim() : '';
|
||||
if (!summary) continue;
|
||||
|
||||
const description = typeof vevent.description === 'string' ? vevent.description.trim() : null;
|
||||
const location = typeof vevent.location === 'string' ? vevent.location.trim() : null;
|
||||
|
||||
// Detect all-day: either DTSTART is DATE (no time), or duration spans full days
|
||||
const allDay =
|
||||
vevent.datetype === 'date' ||
|
||||
(vevent.start &&
|
||||
typeof vevent.start === 'object' &&
|
||||
'dateOnly' in vevent.start &&
|
||||
(vevent.start as { dateOnly?: boolean }).dateOnly === true);
|
||||
|
||||
// Use the VEVENT UID as external ID for dedup stability across re-crawls
|
||||
const uid = typeof vevent.uid === 'string' ? vevent.uid : null;
|
||||
|
||||
// Extract URL if present
|
||||
const eventUrl = typeof vevent.url === 'string' ? vevent.url : sourceUrl;
|
||||
|
||||
events.push({
|
||||
title: summary,
|
||||
description: description ? description.slice(0, 2000) : null,
|
||||
location,
|
||||
startAt: startDate,
|
||||
endAt: endDate,
|
||||
allDay: !!allDay,
|
||||
sourceUrl: eventUrl,
|
||||
externalId: uid,
|
||||
category: null, // iCal doesn't have a standard category we can rely on
|
||||
});
|
||||
}
|
||||
|
||||
return events;
|
||||
}
|
||||
80
services/mana-events/src/discovery/scorer.ts
Normal file
80
services/mana-events/src/discovery/scorer.ts
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Relevance Scorer — ranks discovered events for the user's feed.
|
||||
*
|
||||
* Scoring dimensions:
|
||||
* - Category match with user interests (+20 per match, weighted)
|
||||
* - Freetext match in title (+15 per interest freetext match)
|
||||
* - Distance to nearest region center (-1 per km beyond 5km)
|
||||
* - Time proximity (+10 if within 7 days, +5 if within 14 days)
|
||||
* - Weekend bonus (+5 for Sat/Sun events)
|
||||
*
|
||||
* Score range: 0–100, clamped.
|
||||
*/
|
||||
|
||||
export interface ScoredEventInput {
|
||||
category: string | null;
|
||||
title: string;
|
||||
lat: number | null;
|
||||
lon: number | null;
|
||||
startAt: Date;
|
||||
}
|
||||
|
||||
export interface ScoringContext {
|
||||
interests: Array<{ category: string; freetext: string | null; weight: number }>;
|
||||
regions: Array<{ lat: number; lon: number; radiusKm: number }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a relevance score for a discovered event.
|
||||
* Returns 0–100, higher = more relevant.
|
||||
*/
|
||||
export function scoreEvent(event: ScoredEventInput, ctx: ScoringContext): number {
|
||||
let score = 50; // Base score
|
||||
|
||||
// ── Category match ──────────────────────────────────────────
|
||||
for (const interest of ctx.interests) {
|
||||
if (event.category && event.category === interest.category) {
|
||||
score += 20 * interest.weight;
|
||||
}
|
||||
if (interest.freetext && event.title.toLowerCase().includes(interest.freetext.toLowerCase())) {
|
||||
score += 15 * interest.weight;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Distance ────────────────────────────────────────────────
|
||||
if (event.lat != null && event.lon != null && ctx.regions.length > 0) {
|
||||
const nearest = Math.min(
|
||||
...ctx.regions.map((r) => haversineKm(event.lat!, event.lon!, r.lat, r.lon))
|
||||
);
|
||||
// Penalty: -1 per km beyond 5km
|
||||
score -= Math.max(0, nearest - 5);
|
||||
}
|
||||
|
||||
// ── Time proximity ──────────────────────────────────────────
|
||||
const daysUntil = (event.startAt.getTime() - Date.now()) / 86_400_000;
|
||||
if (daysUntil >= 0 && daysUntil <= 7) score += 10;
|
||||
else if (daysUntil > 7 && daysUntil <= 14) score += 5;
|
||||
|
||||
// ── Weekend bonus ───────────────────────────────────────────
|
||||
const dow = event.startAt.getDay();
|
||||
if (dow === 0 || dow === 6) score += 5;
|
||||
|
||||
return Math.max(0, Math.min(100, Math.round(score)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Haversine distance in km between two lat/lon points.
|
||||
*/
|
||||
function haversineKm(lat1: number, lon1: number, lat2: number, lon2: number): number {
|
||||
const R = 6371;
|
||||
const dLat = toRad(lat2 - lat1);
|
||||
const dLon = toRad(lon2 - lon1);
|
||||
const a =
|
||||
Math.sin(dLat / 2) ** 2 +
|
||||
Math.cos(toRad(lat1)) * Math.cos(toRad(lat2)) * Math.sin(dLon / 2) ** 2;
|
||||
return R * 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a));
|
||||
}
|
||||
|
||||
function toRad(deg: number): number {
|
||||
return (deg * Math.PI) / 180;
|
||||
}
|
||||
187
services/mana-events/src/discovery/source-discoverer.ts
Normal file
187
services/mana-events/src/discovery/source-discoverer.ts
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
/**
|
||||
* Source Discoverer — automatically finds event sources for a region.
|
||||
*
|
||||
* Given a region (e.g. "Freiburg"), searches the web via mana-research
|
||||
* for iCal feeds and venue websites, then inserts them as suggested
|
||||
* sources the user can activate or reject.
|
||||
*
|
||||
* Pipeline:
|
||||
* 1. Build search queries from region label
|
||||
* 2. Search via mana-research POST /api/v1/search
|
||||
* 3. Classify results: .ics URLs → 'ical', venue/event pages → 'website'
|
||||
* 4. Insert as discovery_sources with is_active=false (suggested)
|
||||
*/
|
||||
|
||||
import { eq, and } from 'drizzle-orm';
|
||||
import type { Database } from '../db/connection';
|
||||
import { discoverySources, discoveryRegions } from '../db/schema/discovery';
|
||||
|
||||
const SEARCH_TIMEOUT_MS = 15_000;
|
||||
|
||||
/** Patterns that indicate an iCal feed URL. */
|
||||
const ICAL_PATTERNS = [/\.ics$/i, /\.ical$/i, /webcal:\/\//i, /format=ical/i, /export.*ical/i];
|
||||
|
||||
/** Patterns that indicate an event/venue page worth crawling. */
|
||||
const EVENT_PAGE_PATTERNS = [
|
||||
/veranstaltung/i,
|
||||
/kalender/i,
|
||||
/programm/i,
|
||||
/events?\b/i,
|
||||
/termine/i,
|
||||
/konzert/i,
|
||||
/festival/i,
|
||||
/theater/i,
|
||||
/what.?s.?on/i,
|
||||
/schedule/i,
|
||||
/agenda/i,
|
||||
];
|
||||
|
||||
/** Search queries to discover event sources for a region. */
|
||||
function buildSearchQueries(regionLabel: string): string[] {
|
||||
return [
|
||||
`${regionLabel} Veranstaltungskalender`,
|
||||
`${regionLabel} Events Termine`,
|
||||
`${regionLabel} Kulturzentrum Programm`,
|
||||
`${regionLabel} Konzerte Theater Termine`,
|
||||
`${regionLabel} Vereine Veranstaltungen`,
|
||||
];
|
||||
}
|
||||
|
||||
interface SearchHit {
|
||||
url: string;
|
||||
title: string;
|
||||
snippet?: string;
|
||||
}
|
||||
|
||||
interface SearchResponse {
|
||||
success: boolean;
|
||||
data?: {
|
||||
results: SearchHit[];
|
||||
};
|
||||
}
|
||||
|
||||
/** Classify a URL as ical, website, or null (not relevant). */
|
||||
function classifyUrl(url: string, title: string, snippet?: string): 'ical' | 'website' | null {
|
||||
// Check for iCal feed
|
||||
if (ICAL_PATTERNS.some((p) => p.test(url))) return 'ical';
|
||||
|
||||
// Check for event/venue page
|
||||
const text = `${url} ${title} ${snippet ?? ''}`;
|
||||
if (EVENT_PAGE_PATTERNS.some((p) => p.test(text))) return 'website';
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Extract a human-readable name from a URL + title. */
|
||||
function extractSourceName(url: string, title: string): string {
|
||||
// Prefer the page title, trimmed to something reasonable
|
||||
if (title) {
|
||||
// Strip common suffixes
|
||||
const cleaned = title
|
||||
.replace(/\s*[-|–—]\s*(Startseite|Home|Events?|Veranstaltungen|Termine|Programm).*$/i, '')
|
||||
.trim();
|
||||
if (cleaned.length > 3 && cleaned.length < 100) return cleaned;
|
||||
}
|
||||
// Fallback: hostname
|
||||
try {
|
||||
return new URL(url).hostname.replace(/^www\./, '');
|
||||
} catch {
|
||||
return url.slice(0, 80);
|
||||
}
|
||||
}
|
||||
|
||||
export interface DiscoverResult {
|
||||
suggestedCount: number;
|
||||
queries: number;
|
||||
searchResults: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Discover event sources for a region by searching the web.
|
||||
*
|
||||
* Inserts found sources with is_active=false so the user can review
|
||||
* and activate them. Skips URLs that already exist as sources.
|
||||
*/
|
||||
export async function discoverSourcesForRegion(
|
||||
db: Database,
|
||||
regionId: string,
|
||||
userId: string,
|
||||
manaResearchUrl: string
|
||||
): Promise<DiscoverResult> {
|
||||
// Get the region
|
||||
const [region] = await db
|
||||
.select()
|
||||
.from(discoveryRegions)
|
||||
.where(and(eq(discoveryRegions.id, regionId), eq(discoveryRegions.userId, userId)))
|
||||
.limit(1);
|
||||
|
||||
if (!region) throw new Error('Region not found');
|
||||
|
||||
// Get existing source URLs to avoid duplicates
|
||||
const existingSources = await db
|
||||
.select({ url: discoverySources.url })
|
||||
.from(discoverySources)
|
||||
.where(eq(discoverySources.userId, userId));
|
||||
const existingUrls = new Set(existingSources.map((s) => s.url).filter(Boolean));
|
||||
|
||||
const queries = buildSearchQueries(region.label);
|
||||
let totalResults = 0;
|
||||
let suggestedCount = 0;
|
||||
|
||||
// Run searches in parallel (but limit to avoid hammering the service)
|
||||
const searchResults = await Promise.all(
|
||||
queries.map((query) => searchWeb(manaResearchUrl, query))
|
||||
);
|
||||
|
||||
for (const result of searchResults) {
|
||||
if (!result?.data?.results) continue;
|
||||
|
||||
for (const hit of result.data.results) {
|
||||
totalResults++;
|
||||
if (existingUrls.has(hit.url)) continue;
|
||||
|
||||
const type = classifyUrl(hit.url, hit.title, hit.snippet);
|
||||
if (!type) continue;
|
||||
|
||||
const name = extractSourceName(hit.url, hit.title);
|
||||
|
||||
try {
|
||||
await db.insert(discoverySources).values({
|
||||
userId,
|
||||
type,
|
||||
url: hit.url,
|
||||
name,
|
||||
regionId,
|
||||
isActive: false, // suggested — user must activate
|
||||
crawlIntervalHours: type === 'ical' ? 24 : 48,
|
||||
});
|
||||
existingUrls.add(hit.url);
|
||||
suggestedCount++;
|
||||
} catch {
|
||||
// Ignore dupes from parallel queries
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { suggestedCount, queries: queries.length, searchResults: totalResults };
|
||||
}
|
||||
|
||||
/** Search the web via mana-research. Gracefully returns null on failure. */
|
||||
async function searchWeb(manaResearchUrl: string, query: string): Promise<SearchResponse | null> {
|
||||
try {
|
||||
const res = await fetch(`${manaResearchUrl}/api/v1/search`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query }),
|
||||
signal: AbortSignal.timeout(SEARCH_TIMEOUT_MS),
|
||||
});
|
||||
if (!res.ok) {
|
||||
console.warn(`[source-discoverer] search failed ${res.status}: ${query}`);
|
||||
return null;
|
||||
}
|
||||
return (await res.json()) as SearchResponse;
|
||||
} catch (err) {
|
||||
console.warn(`[source-discoverer] search error for "${query}":`, err);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
50
services/mana-events/src/discovery/types.ts
Normal file
50
services/mana-events/src/discovery/types.ts
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
/**
|
||||
* Discovery domain types — shared across parser, scheduler, and routes.
|
||||
*/
|
||||
|
||||
/** A normalized event extracted from any source type. */
|
||||
export interface NormalizedEvent {
|
||||
title: string;
|
||||
description?: string | null;
|
||||
location?: string | null;
|
||||
lat?: number | null;
|
||||
lon?: number | null;
|
||||
startAt: Date;
|
||||
endAt?: Date | null;
|
||||
allDay?: boolean;
|
||||
imageUrl?: string | null;
|
||||
sourceUrl: string;
|
||||
category?: string | null;
|
||||
priceInfo?: string | null;
|
||||
externalId?: string | null;
|
||||
}
|
||||
|
||||
/** Result of crawling a single source. */
|
||||
export interface CrawlResult {
|
||||
sourceId: string;
|
||||
sourceName: string;
|
||||
events: NormalizedEvent[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/** Source types supported by the crawler. */
|
||||
export type SourceType = 'ical' | 'website' | 'api' | 'search_query';
|
||||
|
||||
/** Event categories for filtering and scoring. */
|
||||
export const EVENT_CATEGORIES = [
|
||||
'music',
|
||||
'theater',
|
||||
'art',
|
||||
'tech',
|
||||
'sport',
|
||||
'food',
|
||||
'family',
|
||||
'nature',
|
||||
'education',
|
||||
'community',
|
||||
'nightlife',
|
||||
'market',
|
||||
'other',
|
||||
] as const;
|
||||
|
||||
export type EventCategory = (typeof EVENT_CATEGORIES)[number];
|
||||
236
services/mana-events/src/discovery/website-extractor.ts
Normal file
236
services/mana-events/src/discovery/website-extractor.ts
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
/**
|
||||
* Website Extractor — LLM-based event extraction from unstructured web pages.
|
||||
*
|
||||
* Pipeline:
|
||||
* 1. Crawl the page via mana-research POST /api/v1/extract
|
||||
* 2. Feed the extracted text to mana-llm with a structured output prompt
|
||||
* 3. Parse the LLM response as NormalizedEvent[]
|
||||
*
|
||||
* Uses cheap/fast models (gemma3:4b or haiku) to keep costs low.
|
||||
* Falls back gracefully on any failure — one bad page doesn't crash the batch.
|
||||
*/
|
||||
|
||||
import type { NormalizedEvent } from './types';
|
||||
|
||||
const EXTRACT_TIMEOUT_MS = 20_000;
|
||||
const LLM_TIMEOUT_MS = 30_000;
|
||||
const MAX_CONTENT_CHARS = 15_000; // Trim long pages to stay within context window
|
||||
|
||||
interface ExtractResponse {
|
||||
success: boolean;
|
||||
data?: {
|
||||
content: {
|
||||
title?: string;
|
||||
text?: string;
|
||||
markdown?: string;
|
||||
html?: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
interface ChatCompletionResponse {
|
||||
choices: Array<{
|
||||
message: {
|
||||
content: string;
|
||||
};
|
||||
}>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract events from a website URL.
|
||||
*
|
||||
* 1. Fetches + renders the page via mana-research (Firecrawl/Jina/Readability)
|
||||
* 2. Sends the text to mana-llm with a structured extraction prompt
|
||||
* 3. Parses JSON output into NormalizedEvent[]
|
||||
*/
|
||||
export async function extractEventsFromWebsite(
|
||||
url: string,
|
||||
sourceName: string,
|
||||
manaResearchUrl: string,
|
||||
manaLlmUrl: string
|
||||
): Promise<NormalizedEvent[]> {
|
||||
// Step 1: Extract page content
|
||||
const content = await fetchPageContent(url, manaResearchUrl);
|
||||
if (!content) return [];
|
||||
|
||||
// Step 2: LLM extraction
|
||||
const events = await llmExtractEvents(content, url, sourceName, manaLlmUrl);
|
||||
return events;
|
||||
}
|
||||
|
||||
/** Fetch and extract text content from a URL via mana-research. */
|
||||
async function fetchPageContent(url: string, manaResearchUrl: string): Promise<string | null> {
|
||||
try {
|
||||
const res = await fetch(`${manaResearchUrl}/api/v1/extract`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ url }),
|
||||
signal: AbortSignal.timeout(EXTRACT_TIMEOUT_MS),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
console.warn(`[website-extractor] extract failed ${res.status}: ${url}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const data = (await res.json()) as ExtractResponse;
|
||||
if (!data.success || !data.data?.content) return null;
|
||||
|
||||
// Prefer markdown > text > html
|
||||
const text = data.data.content.markdown || data.data.content.text || '';
|
||||
if (text.length < 50) return null; // Too short to contain events
|
||||
|
||||
// Trim to stay within LLM context window
|
||||
return text.slice(0, MAX_CONTENT_CHARS);
|
||||
} catch (err) {
|
||||
console.warn(`[website-extractor] fetch error for ${url}:`, err);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Build the LLM system prompt for event extraction. */
|
||||
function buildExtractionPrompt(): string {
|
||||
const today = new Date().toISOString().slice(0, 10);
|
||||
return `Du bist ein Event-Extractor. Extrahiere ALLE kommenden Veranstaltungen von der gegebenen Webseite.
|
||||
|
||||
Pro Event liefere:
|
||||
- title (string, Pflicht) — Name der Veranstaltung
|
||||
- date (string, Pflicht) — Startdatum im Format YYYY-MM-DD
|
||||
- time (string, optional) — Startzeit im Format HH:MM
|
||||
- endDate (string, optional) — Enddatum falls mehrtägig
|
||||
- endTime (string, optional) — Endzeit
|
||||
- location (string, optional) — Veranstaltungsort / Adresse
|
||||
- description (string, optional) — Kurzbeschreibung, max 300 Zeichen
|
||||
- category (string, optional) — Eine von: music, theater, art, tech, sport, food, family, nature, education, community, nightlife, market, other
|
||||
- priceInfo (string, optional) — Preis, z.B. "Eintritt frei", "15 EUR", "VVK 12 / AK 15"
|
||||
|
||||
Heutiges Datum: ${today}
|
||||
Ignoriere vergangene Events (vor ${today}).
|
||||
Antwort als JSON-Objekt mit einem "events"-Array. Kein Markdown, nur JSON.`;
|
||||
}
|
||||
|
||||
/** Send page content to mana-llm for structured event extraction. */
|
||||
async function llmExtractEvents(
|
||||
pageContent: string,
|
||||
sourceUrl: string,
|
||||
sourceName: string,
|
||||
manaLlmUrl: string
|
||||
): Promise<NormalizedEvent[]> {
|
||||
try {
|
||||
const res = await fetch(`${manaLlmUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: 'ollama/gemma3:4b',
|
||||
messages: [
|
||||
{ role: 'system', content: buildExtractionPrompt() },
|
||||
{ role: 'user', content: `Extrahiere Events von dieser Seite:\n\n${pageContent}` },
|
||||
],
|
||||
max_tokens: 2048,
|
||||
temperature: 0,
|
||||
response_format: { type: 'json_object' },
|
||||
}),
|
||||
signal: AbortSignal.timeout(LLM_TIMEOUT_MS),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
console.warn(`[website-extractor] LLM failed ${res.status}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
const completion = (await res.json()) as ChatCompletionResponse;
|
||||
const rawJson = completion.choices?.[0]?.message?.content ?? '';
|
||||
|
||||
return parseExtractedEvents(rawJson, sourceUrl, sourceName);
|
||||
} catch (err) {
|
||||
console.warn(`[website-extractor] LLM error:`, err);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/** Parse and validate LLM JSON output into NormalizedEvents. */
|
||||
export function parseExtractedEvents(
|
||||
rawJson: string,
|
||||
sourceUrl: string,
|
||||
sourceName: string
|
||||
): NormalizedEvent[] {
|
||||
try {
|
||||
// Strip markdown fences if present
|
||||
const cleaned = rawJson.replace(/^```(?:json)?\s*\n?/m, '').replace(/\n?```\s*$/m, '');
|
||||
const parsed = JSON.parse(cleaned);
|
||||
const rawEvents = parsed.events ?? parsed;
|
||||
|
||||
if (!Array.isArray(rawEvents)) return [];
|
||||
|
||||
const now = new Date();
|
||||
const events: NormalizedEvent[] = [];
|
||||
|
||||
for (const raw of rawEvents) {
|
||||
if (!raw.title || !raw.date) continue;
|
||||
|
||||
// Parse date — LLMs sometimes return "25. April 2026" instead of ISO
|
||||
const startAt = parseFlexibleDate(raw.date, raw.time);
|
||||
if (!startAt || isNaN(startAt.getTime())) continue;
|
||||
|
||||
// Skip past events
|
||||
if (startAt.getTime() < now.getTime() - 24 * 60 * 60 * 1000) continue;
|
||||
|
||||
const endAt = raw.endDate ? parseFlexibleDate(raw.endDate, raw.endTime) : null;
|
||||
|
||||
events.push({
|
||||
title: String(raw.title).trim().slice(0, 200),
|
||||
description: raw.description ? String(raw.description).trim().slice(0, 2000) : null,
|
||||
location: raw.location ? String(raw.location).trim() : null,
|
||||
startAt,
|
||||
endAt,
|
||||
allDay: !raw.time,
|
||||
sourceUrl,
|
||||
category: raw.category ?? null,
|
||||
priceInfo: raw.priceInfo ? String(raw.priceInfo).trim() : null,
|
||||
});
|
||||
}
|
||||
|
||||
return events;
|
||||
} catch (err) {
|
||||
console.warn(`[website-extractor] JSON parse error:`, err);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/** Parse dates flexibly — handles ISO, German formats, and partial dates. */
|
||||
function parseFlexibleDate(dateStr: string, timeStr?: string): Date | null {
|
||||
if (!dateStr) return null;
|
||||
|
||||
// Try ISO format first (YYYY-MM-DD)
|
||||
const isoMatch = dateStr.match(/^(\d{4})-(\d{2})-(\d{2})/);
|
||||
if (isoMatch) {
|
||||
const [, y, m, d] = isoMatch;
|
||||
const time = parseTime(timeStr);
|
||||
return new Date(`${y}-${m}-${d}T${time}:00`);
|
||||
}
|
||||
|
||||
// Try German format (DD.MM.YYYY)
|
||||
const deMatch = dateStr.match(/(\d{1,2})\.(\d{1,2})\.(\d{4})/);
|
||||
if (deMatch) {
|
||||
const [, d, m, y] = deMatch;
|
||||
const time = parseTime(timeStr);
|
||||
return new Date(`${y}-${m!.padStart(2, '0')}-${d!.padStart(2, '0')}T${time}:00`);
|
||||
}
|
||||
|
||||
// Fallback: let Date parse it
|
||||
try {
|
||||
const d = new Date(dateStr);
|
||||
if (!isNaN(d.getTime())) return d;
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function parseTime(timeStr?: string): string {
|
||||
if (!timeStr) return '00:00';
|
||||
const match = timeStr.match(/(\d{1,2}):(\d{2})/);
|
||||
if (match) return `${match[1]!.padStart(2, '0')}:${match[2]}`;
|
||||
return '00:00';
|
||||
}
|
||||
|
|
@ -14,6 +14,7 @@ import { createApp } from './app';
|
|||
import { loadConfig } from './config';
|
||||
import { getDb } from './db/connection';
|
||||
import { startRateBucketSweeper } from './lib/cleanup';
|
||||
import { startCrawlScheduler } from './discovery/crawl-scheduler';
|
||||
|
||||
const config = loadConfig();
|
||||
const db = getDb(config.databaseUrl);
|
||||
|
|
@ -22,6 +23,12 @@ const db = getDb(config.databaseUrl);
|
|||
// accumulate for the lifetime of long-published events.
|
||||
startRateBucketSweeper(db);
|
||||
|
||||
// Event discovery — crawl sources (iCal feeds, websites) every 15 minutes.
|
||||
startCrawlScheduler(db, {
|
||||
manaResearchUrl: config.manaResearchUrl,
|
||||
manaLlmUrl: config.manaLlmUrl,
|
||||
});
|
||||
|
||||
console.log(`mana-events starting on port ${config.port}...`);
|
||||
|
||||
export default {
|
||||
|
|
|
|||
136
services/mana-events/src/routes/discovery-feed.ts
Normal file
136
services/mana-events/src/routes/discovery-feed.ts
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
/**
|
||||
* Discovery feed routes — paginated event feed + user actions.
|
||||
*
|
||||
* The feed endpoint returns discovered events sorted by start date,
|
||||
* filtered by date range and optional category, enriched with the
|
||||
* user's action status (saved/dismissed/null).
|
||||
*/
|
||||
|
||||
import { Hono } from 'hono';
|
||||
import { z } from 'zod';
|
||||
import { and, eq, gte, lte, sql, isNull, desc, asc } from 'drizzle-orm';
|
||||
import type { Database } from '../db/connection';
|
||||
import { discoveredEvents, discoveryUserActions, discoverySources } from '../db/schema/discovery';
|
||||
import { BadRequestError } from '../lib/errors';
|
||||
import type { AuthUser } from '../middleware/jwt-auth';
|
||||
|
||||
const feedQuerySchema = z.object({
|
||||
from: z.string().datetime().optional(),
|
||||
to: z.string().datetime().optional(),
|
||||
category: z.string().max(50).optional(),
|
||||
limit: z.coerce.number().int().min(1).max(100).optional(),
|
||||
offset: z.coerce.number().int().min(0).optional(),
|
||||
hideDismissed: z.coerce.boolean().optional(),
|
||||
});
|
||||
|
||||
const actionSchema = z.object({
|
||||
action: z.enum(['save', 'dismiss']),
|
||||
});
|
||||
|
||||
export function createDiscoveryFeedRoutes(db: Database) {
|
||||
const app = new Hono<{ Variables: { user: AuthUser } }>();
|
||||
|
||||
/**
|
||||
* GET /feed — paginated discovered events.
|
||||
*
|
||||
* Joins with user actions to include save/dismiss status.
|
||||
* Only shows events from the current user's sources.
|
||||
*/
|
||||
app.get('/feed', async (c) => {
|
||||
const user = c.get('user');
|
||||
const query = feedQuerySchema.safeParse(c.req.query());
|
||||
if (!query.success) throw new BadRequestError(query.error.issues[0]?.message ?? 'Invalid');
|
||||
|
||||
const { from, to, category, hideDismissed } = query.data;
|
||||
const limit = query.data.limit ?? 20;
|
||||
const offset = query.data.offset ?? 0;
|
||||
|
||||
// Build WHERE conditions
|
||||
const conditions = [
|
||||
// Only events from this user's sources
|
||||
eq(discoverySources.userId, user.userId),
|
||||
eq(discoverySources.isActive, true),
|
||||
// Only future events (or today)
|
||||
gte(discoveredEvents.startAt, from ? new Date(from) : new Date()),
|
||||
];
|
||||
|
||||
if (to) {
|
||||
conditions.push(lte(discoveredEvents.startAt, new Date(to)));
|
||||
}
|
||||
if (category) {
|
||||
conditions.push(eq(discoveredEvents.category, category));
|
||||
}
|
||||
|
||||
// Query events with left join on user actions
|
||||
const rows = await db
|
||||
.select({
|
||||
id: discoveredEvents.id,
|
||||
title: discoveredEvents.title,
|
||||
description: discoveredEvents.description,
|
||||
location: discoveredEvents.location,
|
||||
lat: discoveredEvents.lat,
|
||||
lon: discoveredEvents.lon,
|
||||
startAt: discoveredEvents.startAt,
|
||||
endAt: discoveredEvents.endAt,
|
||||
allDay: discoveredEvents.allDay,
|
||||
imageUrl: discoveredEvents.imageUrl,
|
||||
sourceUrl: discoveredEvents.sourceUrl,
|
||||
sourceName: discoveredEvents.sourceName,
|
||||
category: discoveredEvents.category,
|
||||
priceInfo: discoveredEvents.priceInfo,
|
||||
crawledAt: discoveredEvents.crawledAt,
|
||||
userAction: discoveryUserActions.action,
|
||||
})
|
||||
.from(discoveredEvents)
|
||||
.innerJoin(discoverySources, eq(discoveredEvents.sourceId, discoverySources.id))
|
||||
.leftJoin(
|
||||
discoveryUserActions,
|
||||
and(
|
||||
eq(discoveryUserActions.eventId, discoveredEvents.id),
|
||||
eq(discoveryUserActions.userId, user.userId)
|
||||
)
|
||||
)
|
||||
.where(and(...conditions))
|
||||
.orderBy(asc(discoveredEvents.startAt))
|
||||
.limit(limit + 1) // fetch one extra to determine hasMore
|
||||
.offset(offset);
|
||||
|
||||
// Filter dismissed events client-side if requested
|
||||
const filtered = hideDismissed ? rows.filter((r) => r.userAction !== 'dismiss') : rows;
|
||||
|
||||
const hasMore = filtered.length > limit;
|
||||
const events = filtered.slice(0, limit);
|
||||
|
||||
return c.json({ events, total: events.length, hasMore });
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /feed/:eventId/action — save or dismiss a discovered event.
|
||||
*/
|
||||
app.post('/feed/:eventId/action', async (c) => {
|
||||
const user = c.get('user');
|
||||
const eventId = c.req.param('eventId');
|
||||
const body = await c.req.json().catch(() => null);
|
||||
const parsed = actionSchema.safeParse(body);
|
||||
if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid');
|
||||
|
||||
await db
|
||||
.insert(discoveryUserActions)
|
||||
.values({
|
||||
userId: user.userId,
|
||||
eventId,
|
||||
action: parsed.data.action,
|
||||
})
|
||||
.onConflictDoUpdate({
|
||||
target: [discoveryUserActions.userId, discoveryUserActions.eventId],
|
||||
set: {
|
||||
action: parsed.data.action,
|
||||
actedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
return c.json({ ok: true });
|
||||
});
|
||||
|
||||
return app;
|
||||
}
|
||||
299
services/mana-events/src/routes/discovery.ts
Normal file
299
services/mana-events/src/routes/discovery.ts
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
/**
|
||||
* Discovery CRUD routes — JWT-authenticated.
|
||||
*
|
||||
* Manages regions, interests, and sources for a user's event discovery setup.
|
||||
*/
|
||||
|
||||
import { Hono } from 'hono';
|
||||
import { z } from 'zod';
|
||||
import { and, eq } from 'drizzle-orm';
|
||||
import type { Database } from '../db/connection';
|
||||
import { discoveryRegions, discoveryInterests, discoverySources } from '../db/schema/discovery';
|
||||
import { EVENT_CATEGORIES } from '../discovery/types';
|
||||
import { crawlSourceNow } from '../discovery/crawl-scheduler';
|
||||
import { discoverSourcesForRegion } from '../discovery/source-discoverer';
|
||||
import { BadRequestError, ForbiddenError, NotFoundError } from '../lib/errors';
|
||||
import type { AuthUser } from '../middleware/jwt-auth';
|
||||
import type { Config } from '../config';
|
||||
|
||||
// ─── Validation schemas ─────────────────────────────────────────────
|
||||
|
||||
const regionCreateSchema = z.object({
|
||||
label: z.string().min(1).max(200),
|
||||
lat: z.number().min(-90).max(90),
|
||||
lon: z.number().min(-180).max(180),
|
||||
radiusKm: z.number().int().min(1).max(200).optional(),
|
||||
});
|
||||
|
||||
const regionUpdateSchema = z.object({
|
||||
label: z.string().min(1).max(200).optional(),
|
||||
radiusKm: z.number().int().min(1).max(200).optional(),
|
||||
isActive: z.boolean().optional(),
|
||||
});
|
||||
|
||||
const interestCreateSchema = z.object({
|
||||
category: z.string().min(1).max(50),
|
||||
freetext: z.string().max(200).nullable().optional(),
|
||||
weight: z.number().min(0.1).max(5).optional(),
|
||||
});
|
||||
|
||||
const sourceCreateSchema = z.object({
|
||||
type: z.enum(['ical', 'website']),
|
||||
url: z.string().url().max(2000),
|
||||
name: z.string().min(1).max(200),
|
||||
regionId: z.string().uuid(),
|
||||
crawlIntervalHours: z.number().int().min(1).max(168).optional(), // max 7 days
|
||||
});
|
||||
|
||||
// ─── Routes ─────────────────────────────────────────────────────────
|
||||
|
||||
export function createDiscoveryRoutes(db: Database, config?: Config) {
|
||||
const app = new Hono<{ Variables: { user: AuthUser } }>();
|
||||
|
||||
// ── Regions ──────────────────────────────────────────────────
|
||||
|
||||
app.get('/regions', async (c) => {
|
||||
const user = c.get('user');
|
||||
const regions = await db
|
||||
.select()
|
||||
.from(discoveryRegions)
|
||||
.where(eq(discoveryRegions.userId, user.userId));
|
||||
return c.json({ regions });
|
||||
});
|
||||
|
||||
app.post('/regions', async (c) => {
|
||||
const user = c.get('user');
|
||||
const body = await c.req.json().catch(() => null);
|
||||
const parsed = regionCreateSchema.safeParse(body);
|
||||
if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid');
|
||||
|
||||
const [region] = await db
|
||||
.insert(discoveryRegions)
|
||||
.values({
|
||||
userId: user.userId,
|
||||
label: parsed.data.label,
|
||||
lat: parsed.data.lat,
|
||||
lon: parsed.data.lon,
|
||||
radiusKm: parsed.data.radiusKm ?? 25,
|
||||
})
|
||||
.returning();
|
||||
return c.json({ region }, 201);
|
||||
});
|
||||
|
||||
app.put('/regions/:id', async (c) => {
|
||||
const user = c.get('user');
|
||||
const id = c.req.param('id');
|
||||
const body = await c.req.json().catch(() => null);
|
||||
const parsed = regionUpdateSchema.safeParse(body);
|
||||
if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid');
|
||||
|
||||
const existing = await db
|
||||
.select()
|
||||
.from(discoveryRegions)
|
||||
.where(and(eq(discoveryRegions.id, id), eq(discoveryRegions.userId, user.userId)))
|
||||
.limit(1);
|
||||
if (!existing[0]) throw new NotFoundError('Region not found');
|
||||
|
||||
const updates: Partial<typeof discoveryRegions.$inferInsert> = {};
|
||||
if (parsed.data.label !== undefined) updates.label = parsed.data.label;
|
||||
if (parsed.data.radiusKm !== undefined) updates.radiusKm = parsed.data.radiusKm;
|
||||
if (parsed.data.isActive !== undefined) updates.isActive = parsed.data.isActive;
|
||||
|
||||
const [region] = await db
|
||||
.update(discoveryRegions)
|
||||
.set(updates)
|
||||
.where(eq(discoveryRegions.id, id))
|
||||
.returning();
|
||||
return c.json({ region });
|
||||
});
|
||||
|
||||
app.delete('/regions/:id', async (c) => {
|
||||
const user = c.get('user');
|
||||
const id = c.req.param('id');
|
||||
const existing = await db
|
||||
.select()
|
||||
.from(discoveryRegions)
|
||||
.where(and(eq(discoveryRegions.id, id), eq(discoveryRegions.userId, user.userId)))
|
||||
.limit(1);
|
||||
if (!existing[0]) throw new NotFoundError('Region not found');
|
||||
|
||||
await db.delete(discoveryRegions).where(eq(discoveryRegions.id, id));
|
||||
return c.json({ deleted: true });
|
||||
});
|
||||
|
||||
// ── Interests ────────────────────────────────────────────────
|
||||
|
||||
app.get('/interests', async (c) => {
|
||||
const user = c.get('user');
|
||||
const interests = await db
|
||||
.select()
|
||||
.from(discoveryInterests)
|
||||
.where(eq(discoveryInterests.userId, user.userId));
|
||||
return c.json({ interests });
|
||||
});
|
||||
|
||||
app.post('/interests', async (c) => {
|
||||
const user = c.get('user');
|
||||
const body = await c.req.json().catch(() => null);
|
||||
const parsed = interestCreateSchema.safeParse(body);
|
||||
if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid');
|
||||
|
||||
const [interest] = await db
|
||||
.insert(discoveryInterests)
|
||||
.values({
|
||||
userId: user.userId,
|
||||
category: parsed.data.category,
|
||||
freetext: parsed.data.freetext ?? null,
|
||||
weight: parsed.data.weight ?? 1.0,
|
||||
})
|
||||
.returning();
|
||||
return c.json({ interest }, 201);
|
||||
});
|
||||
|
||||
app.delete('/interests/:id', async (c) => {
|
||||
const user = c.get('user');
|
||||
const id = c.req.param('id');
|
||||
const existing = await db
|
||||
.select()
|
||||
.from(discoveryInterests)
|
||||
.where(and(eq(discoveryInterests.id, id), eq(discoveryInterests.userId, user.userId)))
|
||||
.limit(1);
|
||||
if (!existing[0]) throw new NotFoundError('Interest not found');
|
||||
|
||||
await db.delete(discoveryInterests).where(eq(discoveryInterests.id, id));
|
||||
return c.json({ deleted: true });
|
||||
});
|
||||
|
||||
// ── Sources ──────────────────────────────────────────────────
|
||||
|
||||
app.get('/sources', async (c) => {
|
||||
const user = c.get('user');
|
||||
const sources = await db
|
||||
.select()
|
||||
.from(discoverySources)
|
||||
.where(eq(discoverySources.userId, user.userId));
|
||||
return c.json({ sources });
|
||||
});
|
||||
|
||||
app.post('/sources', async (c) => {
|
||||
const user = c.get('user');
|
||||
const body = await c.req.json().catch(() => null);
|
||||
const parsed = sourceCreateSchema.safeParse(body);
|
||||
if (!parsed.success) throw new BadRequestError(parsed.error.issues[0]?.message ?? 'Invalid');
|
||||
|
||||
// Verify the region belongs to this user
|
||||
const region = await db
|
||||
.select()
|
||||
.from(discoveryRegions)
|
||||
.where(
|
||||
and(eq(discoveryRegions.id, parsed.data.regionId), eq(discoveryRegions.userId, user.userId))
|
||||
)
|
||||
.limit(1);
|
||||
if (!region[0]) throw new BadRequestError('Region not found');
|
||||
|
||||
const [source] = await db
|
||||
.insert(discoverySources)
|
||||
.values({
|
||||
userId: user.userId,
|
||||
type: parsed.data.type,
|
||||
url: parsed.data.url,
|
||||
name: parsed.data.name,
|
||||
regionId: parsed.data.regionId,
|
||||
crawlIntervalHours: parsed.data.crawlIntervalHours ?? 24,
|
||||
})
|
||||
.returning();
|
||||
return c.json({ source }, 201);
|
||||
});
|
||||
|
||||
app.delete('/sources/:id', async (c) => {
|
||||
const user = c.get('user');
|
||||
const id = c.req.param('id');
|
||||
const existing = await db
|
||||
.select()
|
||||
.from(discoverySources)
|
||||
.where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId)))
|
||||
.limit(1);
|
||||
if (!existing[0]) throw new NotFoundError('Source not found');
|
||||
|
||||
await db.delete(discoverySources).where(eq(discoverySources.id, id));
|
||||
return c.json({ deleted: true });
|
||||
});
|
||||
|
||||
// Trigger an immediate crawl for a source
|
||||
app.post('/sources/:id/crawl', async (c) => {
|
||||
const user = c.get('user');
|
||||
const id = c.req.param('id');
|
||||
const existing = await db
|
||||
.select()
|
||||
.from(discoverySources)
|
||||
.where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId)))
|
||||
.limit(1);
|
||||
if (!existing[0]) throw new NotFoundError('Source not found');
|
||||
|
||||
const crawlConfig = config
|
||||
? { manaResearchUrl: config.manaResearchUrl, manaLlmUrl: config.manaLlmUrl }
|
||||
: undefined;
|
||||
const result = await crawlSourceNow(db, id, crawlConfig);
|
||||
return c.json(result);
|
||||
});
|
||||
|
||||
// ── Source Discovery (Phase 2) ───────────────────────────────
|
||||
|
||||
// Auto-discover event sources for a region via web search
|
||||
app.post('/regions/:id/discover-sources', async (c) => {
|
||||
const user = c.get('user');
|
||||
const regionId = c.req.param('id');
|
||||
if (!config) throw new BadRequestError('Source discovery not configured');
|
||||
|
||||
const result = await discoverSourcesForRegion(
|
||||
db,
|
||||
regionId,
|
||||
user.userId,
|
||||
config.manaResearchUrl
|
||||
);
|
||||
return c.json(result);
|
||||
});
|
||||
|
||||
// Activate a suggested source
|
||||
app.put('/sources/:id/activate', async (c) => {
|
||||
const user = c.get('user');
|
||||
const id = c.req.param('id');
|
||||
const existing = await db
|
||||
.select()
|
||||
.from(discoverySources)
|
||||
.where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId)))
|
||||
.limit(1);
|
||||
if (!existing[0]) throw new NotFoundError('Source not found');
|
||||
|
||||
const [source] = await db
|
||||
.update(discoverySources)
|
||||
.set({ isActive: true, updatedAt: new Date() })
|
||||
.where(eq(discoverySources.id, id))
|
||||
.returning();
|
||||
|
||||
// Trigger immediate crawl for the newly activated source
|
||||
const crawlConfig = config
|
||||
? { manaResearchUrl: config.manaResearchUrl, manaLlmUrl: config.manaLlmUrl }
|
||||
: undefined;
|
||||
crawlSourceNow(db, id, crawlConfig).catch(() => {});
|
||||
|
||||
return c.json({ source });
|
||||
});
|
||||
|
||||
// Reject a suggested source
|
||||
app.delete('/sources/:id/reject', async (c) => {
|
||||
const user = c.get('user');
|
||||
const id = c.req.param('id');
|
||||
const existing = await db
|
||||
.select()
|
||||
.from(discoverySources)
|
||||
.where(and(eq(discoverySources.id, id), eq(discoverySources.userId, user.userId)))
|
||||
.limit(1);
|
||||
if (!existing[0]) throw new NotFoundError('Source not found');
|
||||
|
||||
await db.delete(discoverySources).where(eq(discoverySources.id, id));
|
||||
return c.json({ deleted: true });
|
||||
});
|
||||
|
||||
return app;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue