mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-28 21:12:53 +02:00
Phase 1: Manual iCal feeds + Discovery tab - 5 new DB tables in event_discovery schema (regions, interests, sources, discovered_events, user_actions) - iCal parser (node-ical) with deduplication (SHA-256 hash) - Crawl scheduler (15-min interval, auto-deactivate after 5 errors) - CRUD routes for regions, interests, sources + paginated feed endpoint - Frontend: "Meine Events" / "Entdecken" tab navigation in ListView - Discovery setup wizard (regions via mana-geocoding + interests) - DiscoveredEventCard with save/dismiss, SourceManager for iCal feeds - "Merken" creates a local socialEvent from discovered event Phase 2: Auto source discovery + LLM extraction + relevance scoring - Source discoverer: web search via mana-research to auto-find iCal feeds and venue websites for a region - Website extractor: crawl via mana-research /extract, then LLM-based event extraction via mana-llm with structured JSON output - Flexible date parsing (ISO, DD.MM.YYYY), markdown fence stripping - Relevance scorer: category match, freetext match, haversine distance, time proximity, weekend bonus (0-100 clamped) - Routes: POST regions/:id/discover-sources, PUT/DELETE sources/:id/activate|reject - Frontend: "Automatisch finden" button, suggested vs active sources UI 107 tests (all passing), no regressions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
144 lines
4.1 KiB
TypeScript
144 lines
4.1 KiB
TypeScript
/**
|
|
* Website extractor unit tests — tests the JSON parsing and date handling
|
|
* without hitting real LLM or mana-research services.
|
|
*/
|
|
|
|
import { describe, it, expect } from 'bun:test';
|
|
import { parseExtractedEvents } from '../discovery/website-extractor';
|
|
|
|
const SOURCE_URL = 'https://jazzhaus.de/programm';
|
|
const SOURCE_NAME = 'Jazzhaus Freiburg';
|
|
|
|
describe('parseExtractedEvents', () => {
|
|
it('parses a well-formed JSON response', () => {
|
|
const futureYear = new Date().getFullYear() + 1;
|
|
const json = JSON.stringify({
|
|
events: [
|
|
{
|
|
title: 'Jazz Night',
|
|
date: `${futureYear}-05-15`,
|
|
time: '20:00',
|
|
location: 'Jazzhaus Freiburg',
|
|
category: 'music',
|
|
priceInfo: '15 EUR',
|
|
},
|
|
{
|
|
title: 'Rock Festival',
|
|
date: `${futureYear}-06-20`,
|
|
location: 'Stadtpark',
|
|
category: 'music',
|
|
},
|
|
],
|
|
});
|
|
|
|
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
|
expect(events).toHaveLength(2);
|
|
expect(events[0].title).toBe('Jazz Night');
|
|
expect(events[0].location).toBe('Jazzhaus Freiburg');
|
|
expect(events[0].category).toBe('music');
|
|
expect(events[0].priceInfo).toBe('15 EUR');
|
|
expect(events[0].allDay).toBe(false); // has time
|
|
expect(events[1].title).toBe('Rock Festival');
|
|
expect(events[1].allDay).toBe(true); // no time
|
|
});
|
|
|
|
it('handles markdown-fenced JSON', () => {
|
|
const futureYear = new Date().getFullYear() + 1;
|
|
const json = `\`\`\`json
|
|
{
|
|
"events": [
|
|
{"title": "Test", "date": "${futureYear}-03-01", "time": "19:00"}
|
|
]
|
|
}
|
|
\`\`\``;
|
|
|
|
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
|
expect(events).toHaveLength(1);
|
|
expect(events[0].title).toBe('Test');
|
|
});
|
|
|
|
it('parses German date format (DD.MM.YYYY)', () => {
|
|
const futureYear = new Date().getFullYear() + 1;
|
|
const json = JSON.stringify({
|
|
events: [{ title: 'Fest', date: `15.06.${futureYear}`, time: '18:00' }],
|
|
});
|
|
|
|
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
|
expect(events).toHaveLength(1);
|
|
expect(events[0].startAt.getFullYear()).toBe(futureYear);
|
|
expect(events[0].startAt.getMonth()).toBe(5); // June = 5
|
|
});
|
|
|
|
it('filters out past events', () => {
|
|
const json = JSON.stringify({
|
|
events: [
|
|
{ title: 'Past Event', date: '2020-01-01', time: '19:00' },
|
|
{ title: 'Future Event', date: '2030-01-01', time: '19:00' },
|
|
],
|
|
});
|
|
|
|
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
|
expect(events).toHaveLength(1);
|
|
expect(events[0].title).toBe('Future Event');
|
|
});
|
|
|
|
it('skips events without title or date', () => {
|
|
const json = JSON.stringify({
|
|
events: [
|
|
{ title: 'Valid', date: '2030-01-01' },
|
|
{ title: '', date: '2030-01-02' },
|
|
{ title: 'No Date' },
|
|
{ date: '2030-01-03' },
|
|
],
|
|
});
|
|
|
|
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
|
expect(events).toHaveLength(1);
|
|
expect(events[0].title).toBe('Valid');
|
|
});
|
|
|
|
it('truncates long titles and descriptions', () => {
|
|
const json = JSON.stringify({
|
|
events: [
|
|
{
|
|
title: 'A'.repeat(300),
|
|
date: '2030-01-01',
|
|
description: 'B'.repeat(3000),
|
|
},
|
|
],
|
|
});
|
|
|
|
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
|
expect(events[0].title.length).toBe(200);
|
|
expect(events[0].description!.length).toBe(2000);
|
|
});
|
|
|
|
it('handles empty/invalid JSON gracefully', () => {
|
|
expect(parseExtractedEvents('', SOURCE_URL, SOURCE_NAME)).toHaveLength(0);
|
|
expect(parseExtractedEvents('not json', SOURCE_URL, SOURCE_NAME)).toHaveLength(0);
|
|
expect(parseExtractedEvents('{}', SOURCE_URL, SOURCE_NAME)).toHaveLength(0);
|
|
expect(parseExtractedEvents('{"events": "not array"}', SOURCE_URL, SOURCE_NAME)).toHaveLength(
|
|
0
|
|
);
|
|
});
|
|
|
|
it('handles endDate and endTime', () => {
|
|
const futureYear = new Date().getFullYear() + 1;
|
|
const json = JSON.stringify({
|
|
events: [
|
|
{
|
|
title: 'Festival',
|
|
date: `${futureYear}-07-01`,
|
|
time: '10:00',
|
|
endDate: `${futureYear}-07-03`,
|
|
endTime: '23:00',
|
|
},
|
|
],
|
|
});
|
|
|
|
const events = parseExtractedEvents(json, SOURCE_URL, SOURCE_NAME);
|
|
expect(events).toHaveLength(1);
|
|
expect(events[0].endAt).not.toBeNull();
|
|
expect(events[0].endAt!.getDate()).toBe(3);
|
|
});
|
|
});
|