mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-16 22:39:41 +02:00
refactor(shared-rss): extract RSS parsing + Readability into one package
news-ingester and apps/api both shipped their own copy of rss-parser + jsdom + Readability glue. Single source now in packages/shared-rss. Adds discoverFeeds (rel=alternate + common-paths probe) and validateFeed which News Research will use. JSDOM virtualConsole is silenced once, in the package, instead of in two parallel call sites. - packages/shared-rss: parse, extract, discover, validate - services/news-ingester: drop local parsers, depend on @mana/shared-rss - apps/api: drop @mozilla/readability + jsdom direct deps, use shared Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5ae7f99fe1
commit
b768a0ffce
16 changed files with 414 additions and 252 deletions
65
packages/shared-rss/src/parse.ts
Normal file
65
packages/shared-rss/src/parse.ts
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
import Parser from 'rss-parser';
|
||||
import { DEFAULT_USER_AGENT, type NormalizedFeedItem } from './types';
|
||||
|
||||
type CustomItem = {
|
||||
'media:content'?: { $: { url: string } };
|
||||
'media:thumbnail'?: { $: { url: string } };
|
||||
enclosure?: { url?: string };
|
||||
};
|
||||
|
||||
const parser: Parser<unknown, CustomItem> = new Parser({
|
||||
timeout: 15_000,
|
||||
headers: { 'User-Agent': DEFAULT_USER_AGENT },
|
||||
customFields: {
|
||||
item: ['media:content', 'media:thumbnail', 'enclosure'],
|
||||
},
|
||||
});
|
||||
|
||||
function mapItem(item: unknown): NormalizedFeedItem {
|
||||
const i = item as CustomItem & {
|
||||
link?: string;
|
||||
title?: string;
|
||||
content?: string;
|
||||
contentSnippet?: string;
|
||||
creator?: string;
|
||||
author?: string;
|
||||
isoDate?: string;
|
||||
};
|
||||
|
||||
const imageUrl =
|
||||
i['media:content']?.$?.url ?? i['media:thumbnail']?.$?.url ?? i.enclosure?.url ?? null;
|
||||
|
||||
return {
|
||||
url: i.link ?? '',
|
||||
title: i.title ?? '',
|
||||
excerpt: i.contentSnippet ?? null,
|
||||
content: i.contentSnippet ?? null,
|
||||
htmlContent: i.content ?? null,
|
||||
author: i.creator ?? i.author ?? null,
|
||||
imageUrl,
|
||||
publishedAt: i.isoDate ? new Date(i.isoDate) : null,
|
||||
};
|
||||
}
|
||||
|
||||
export async function parseFeedUrl(url: string): Promise<NormalizedFeedItem[]> {
|
||||
const feed = await parser.parseURL(url);
|
||||
return (feed.items ?? []).map(mapItem);
|
||||
}
|
||||
|
||||
export async function parseFeedXml(xml: string): Promise<NormalizedFeedItem[]> {
|
||||
const feed = await parser.parseString(xml);
|
||||
return (feed.items ?? []).map(mapItem);
|
||||
}
|
||||
|
||||
export interface ParsedFeed {
|
||||
title: string | null;
|
||||
items: NormalizedFeedItem[];
|
||||
}
|
||||
|
||||
export async function parseFeedMeta(url: string): Promise<ParsedFeed> {
|
||||
const feed = await parser.parseURL(url);
|
||||
return {
|
||||
title: feed.title ?? null,
|
||||
items: (feed.items ?? []).map(mapItem),
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue