mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-16 01:39:40 +02:00
Adds the services/news-ingester Bun service that pulls 25 public RSS/JSON feeds into news.curated_articles every 15 min, with Mozilla Readability fallback for thin RSS bodies and 30-day retention. apps/api /feed is rewritten to read from the new pool table directly instead of the sync_changes hack, with topics/lang/since/limit/offset query params. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
260 lines
6.6 KiB
TypeScript
260 lines
6.6 KiB
TypeScript
/**
|
|
* Curated source list — single source of truth for the news ingester.
|
|
*
|
|
* Each source declares its parser type (`rss` or `hn`), the topic it
|
|
* belongs to, and the language. The `slug` MUST be stable across deploys
|
|
* because user blocklists reference it from client-side storage.
|
|
*
|
|
* Adding a source = append a row here, redeploy. The unified Mana app
|
|
* mirrors a sanitized subset (slug + name + topic + language) in
|
|
* `apps/mana/apps/web/src/lib/modules/news/sources-meta.ts` for the
|
|
* onboarding picker — keep both files in sync when editing.
|
|
*/
|
|
|
|
export type SourceParserType = 'rss' | 'hn';
|
|
|
|
export type Topic =
|
|
| 'tech'
|
|
| 'wissenschaft'
|
|
| 'weltgeschehen'
|
|
| 'wirtschaft'
|
|
| 'kultur'
|
|
| 'gesundheit'
|
|
| 'politik';
|
|
|
|
export interface NewsSource {
|
|
slug: string;
|
|
name: string;
|
|
type: SourceParserType;
|
|
url: string;
|
|
topic: Topic;
|
|
language: 'de' | 'en';
|
|
}
|
|
|
|
export const SOURCES: NewsSource[] = [
|
|
// ─── Tech ──────────────────────────────────────────────────
|
|
{
|
|
slug: 'hacker-news',
|
|
name: 'Hacker News',
|
|
type: 'hn',
|
|
url: 'https://hacker-news.firebaseio.com/v0/topstories.json',
|
|
topic: 'tech',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'arstechnica',
|
|
name: 'Ars Technica',
|
|
type: 'rss',
|
|
url: 'https://feeds.arstechnica.com/arstechnica/index',
|
|
topic: 'tech',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'theverge',
|
|
name: 'The Verge',
|
|
type: 'rss',
|
|
url: 'https://www.theverge.com/rss/index.xml',
|
|
topic: 'tech',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'heise',
|
|
name: 'heise online',
|
|
type: 'rss',
|
|
url: 'https://www.heise.de/rss/heise-atom.xml',
|
|
topic: 'tech',
|
|
language: 'de',
|
|
},
|
|
|
|
// ─── Wissenschaft ──────────────────────────────────────────
|
|
{
|
|
slug: 'quanta-magazine',
|
|
name: 'Quanta Magazine',
|
|
type: 'rss',
|
|
url: 'https://api.quantamagazine.org/feed/',
|
|
topic: 'wissenschaft',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'spektrum',
|
|
name: 'Spektrum',
|
|
type: 'rss',
|
|
url: 'https://www.spektrum.de/alias/rss/spektrum-de-rss-feed/996406',
|
|
topic: 'wissenschaft',
|
|
language: 'de',
|
|
},
|
|
{
|
|
slug: 'nature-news',
|
|
name: 'Nature News',
|
|
type: 'rss',
|
|
url: 'https://www.nature.com/nature.rss',
|
|
topic: 'wissenschaft',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'phys-org',
|
|
name: 'Phys.org',
|
|
type: 'rss',
|
|
url: 'https://phys.org/rss-feed/',
|
|
topic: 'wissenschaft',
|
|
language: 'en',
|
|
},
|
|
|
|
// ─── Weltgeschehen ─────────────────────────────────────────
|
|
// Note: Reuters and AP both block automated feed fetchers as of
|
|
// 2026-04 (Reuters returns 406, AP refuses connection). Replaced
|
|
// with Al Jazeera and DW which both publish open RSS.
|
|
{
|
|
slug: 'tagesschau',
|
|
name: 'Tagesschau',
|
|
type: 'rss',
|
|
url: 'https://www.tagesschau.de/xml/rss2/',
|
|
topic: 'weltgeschehen',
|
|
language: 'de',
|
|
},
|
|
{
|
|
slug: 'bbc-world',
|
|
name: 'BBC World',
|
|
type: 'rss',
|
|
url: 'https://feeds.bbci.co.uk/news/world/rss.xml',
|
|
topic: 'weltgeschehen',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'aljazeera',
|
|
name: 'Al Jazeera',
|
|
type: 'rss',
|
|
url: 'https://www.aljazeera.com/xml/rss/all.xml',
|
|
topic: 'weltgeschehen',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'dw-top',
|
|
name: 'Deutsche Welle',
|
|
type: 'rss',
|
|
url: 'https://rss.dw.com/rdf/rss-en-top',
|
|
topic: 'weltgeschehen',
|
|
language: 'en',
|
|
},
|
|
|
|
// ─── Wirtschaft ────────────────────────────────────────────
|
|
{
|
|
slug: 'handelsblatt',
|
|
name: 'Handelsblatt',
|
|
type: 'rss',
|
|
url: 'https://www.handelsblatt.com/contentexport/feed/schlagzeilen',
|
|
topic: 'wirtschaft',
|
|
language: 'de',
|
|
},
|
|
{
|
|
slug: 'ft-world',
|
|
name: 'Financial Times',
|
|
type: 'rss',
|
|
url: 'https://www.ft.com/world?format=rss',
|
|
topic: 'wirtschaft',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'bloomberg-markets',
|
|
name: 'Bloomberg Markets',
|
|
type: 'rss',
|
|
url: 'https://feeds.bloomberg.com/markets/news.rss',
|
|
topic: 'wirtschaft',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'economist-finance',
|
|
name: 'The Economist — Finance',
|
|
type: 'rss',
|
|
url: 'https://www.economist.com/finance-and-economics/rss.xml',
|
|
topic: 'wirtschaft',
|
|
language: 'en',
|
|
},
|
|
|
|
// ─── Kultur ────────────────────────────────────────────────
|
|
// Perlentaucher and ZEIT Kultur both 404'd in testing (2026-04);
|
|
// replaced with NPR Arts and Guardian Books which are stable.
|
|
{
|
|
slug: 'guardian-culture',
|
|
name: 'The Guardian Culture',
|
|
type: 'rss',
|
|
url: 'https://www.theguardian.com/culture/rss',
|
|
topic: 'kultur',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'guardian-books',
|
|
name: 'The Guardian Books',
|
|
type: 'rss',
|
|
url: 'https://www.theguardian.com/books/rss',
|
|
topic: 'kultur',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'npr-arts',
|
|
name: 'NPR Arts',
|
|
type: 'rss',
|
|
url: 'https://feeds.npr.org/1008/rss.xml',
|
|
topic: 'kultur',
|
|
language: 'en',
|
|
},
|
|
|
|
// ─── Gesundheit ────────────────────────────────────────────
|
|
// Ärzteblatt and NIH both 404'd; STAT News still works. Added
|
|
// BBC Health and ScienceDaily as reliable replacements.
|
|
{
|
|
slug: 'stat-news',
|
|
name: 'STAT News',
|
|
type: 'rss',
|
|
url: 'https://www.statnews.com/feed/',
|
|
topic: 'gesundheit',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'bbc-health',
|
|
name: 'BBC Health',
|
|
type: 'rss',
|
|
url: 'https://feeds.bbci.co.uk/news/health/rss.xml',
|
|
topic: 'gesundheit',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'sciencedaily-health',
|
|
name: 'ScienceDaily Health',
|
|
type: 'rss',
|
|
url: 'https://www.sciencedaily.com/rss/health_medicine.xml',
|
|
topic: 'gesundheit',
|
|
language: 'en',
|
|
},
|
|
|
|
// ─── Politik ───────────────────────────────────────────────
|
|
{
|
|
slug: 'spiegel-politik',
|
|
name: 'Spiegel Politik',
|
|
type: 'rss',
|
|
url: 'https://www.spiegel.de/politik/index.rss',
|
|
topic: 'politik',
|
|
language: 'de',
|
|
},
|
|
{
|
|
slug: 'politico-eu',
|
|
name: 'Politico EU',
|
|
type: 'rss',
|
|
url: 'https://www.politico.eu/feed/',
|
|
topic: 'politik',
|
|
language: 'en',
|
|
},
|
|
{
|
|
slug: 'atlantic-politics',
|
|
name: 'The Atlantic — Politics',
|
|
type: 'rss',
|
|
url: 'https://www.theatlantic.com/feed/channel/politics/',
|
|
topic: 'politik',
|
|
language: 'en',
|
|
},
|
|
];
|
|
|
|
/** Build a quick lookup by slug. */
|
|
export const SOURCE_BY_SLUG: Record<string, NewsSource> = Object.fromEntries(
|
|
SOURCES.map((s) => [s.slug, s])
|
|
);
|