managarten/packages/shared-rss/src/parse.ts
Till JS b768a0ffce refactor(shared-rss): extract RSS parsing + Readability into one package
news-ingester and apps/api both shipped their own copy of rss-parser
+ jsdom + Readability glue. Single source now in packages/shared-rss.
Adds discoverFeeds (rel=alternate + common-paths probe) and validateFeed
which News Research will use. JSDOM virtualConsole is silenced once,
in the package, instead of in two parallel call sites.

- packages/shared-rss: parse, extract, discover, validate
- services/news-ingester: drop local parsers, depend on @mana/shared-rss
- apps/api: drop @mozilla/readability + jsdom direct deps, use shared

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 22:30:44 +02:00

65 lines
1.7 KiB
TypeScript

import Parser from 'rss-parser';
import { DEFAULT_USER_AGENT, type NormalizedFeedItem } from './types';
type CustomItem = {
'media:content'?: { $: { url: string } };
'media:thumbnail'?: { $: { url: string } };
enclosure?: { url?: string };
};
const parser: Parser<unknown, CustomItem> = new Parser({
timeout: 15_000,
headers: { 'User-Agent': DEFAULT_USER_AGENT },
customFields: {
item: ['media:content', 'media:thumbnail', 'enclosure'],
},
});
function mapItem(item: unknown): NormalizedFeedItem {
const i = item as CustomItem & {
link?: string;
title?: string;
content?: string;
contentSnippet?: string;
creator?: string;
author?: string;
isoDate?: string;
};
const imageUrl =
i['media:content']?.$?.url ?? i['media:thumbnail']?.$?.url ?? i.enclosure?.url ?? null;
return {
url: i.link ?? '',
title: i.title ?? '',
excerpt: i.contentSnippet ?? null,
content: i.contentSnippet ?? null,
htmlContent: i.content ?? null,
author: i.creator ?? i.author ?? null,
imageUrl,
publishedAt: i.isoDate ? new Date(i.isoDate) : null,
};
}
export async function parseFeedUrl(url: string): Promise<NormalizedFeedItem[]> {
const feed = await parser.parseURL(url);
return (feed.items ?? []).map(mapItem);
}
export async function parseFeedXml(xml: string): Promise<NormalizedFeedItem[]> {
const feed = await parser.parseString(xml);
return (feed.items ?? []).map(mapItem);
}
export interface ParsedFeed {
title: string | null;
items: NormalizedFeedItem[];
}
export async function parseFeedMeta(url: string): Promise<ParsedFeed> {
const feed = await parser.parseURL(url);
return {
title: feed.title ?? null,
items: (feed.items ?? []).map(mapItem),
};
}