feat(news): backend ingester service + curated feed API

Adds the services/news-ingester Bun service that pulls 25 public RSS/JSON
feeds into news.curated_articles every 15 min, with Mozilla Readability
fallback for thin RSS bodies and 30-day retention. apps/api /feed is
rewritten to read from the new pool table directly instead of the
sync_changes hack, with topics/lang/since/limit/offset query params.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-09 15:53:26 +02:00
parent 45790ffbb8
commit 9ef97a1877
17 changed files with 1058 additions and 64 deletions

View file

@ -0,0 +1,100 @@
# news-ingester
Pulls public RSS/JSON feeds into `news.curated_articles` for the News Hub
module in the unified Mana app. The unified `mana-api` reads from the
same table to serve `GET /api/v1/news/feed`.
## Tech Stack
| Layer | Technology |
|-------|------------|
| Runtime | Bun |
| Framework | Hono (only for health/status/manual trigger) |
| Database | PostgreSQL + Drizzle ORM (schema `news` in `mana_platform`) |
| Parsing | `rss-parser` for RSS/Atom, `@mozilla/readability` + `jsdom` for full-text fallback |
## Port: 3066
## What it does
On startup and every `TICK_INTERVAL_MS` (default 15 min):
1. For each source in `src/sources.ts`, fetch the feed (RSS or HN JSON).
2. Normalize items and dedupe by `sha256(originalUrl)` against the
`url_hash` unique index — re-runs are safe.
3. If the feed body has fewer than 200 words, fall back to Mozilla
Readability against the original URL to get the full article text.
4. Insert into `news.curated_articles` with topic + source slug from the
source definition. Topic classification is **static** (per-source);
we do not run any content classifier.
5. Prune rows older than 30 days at the end of each tick.
## API
| Method | Path | Description |
|--------|------|-------------|
| GET | `/health` | Healthcheck — returns 503 if Postgres unreachable |
| GET | `/status` | Last tick result (sources, counts, duration) |
| POST | `/ingest/run` | Trigger an ingest tick now (returns immediately) |
No auth — service is internal-only behind the docker network.
## Adding a source
1. Append to `SOURCES` in `src/sources.ts` with a stable `slug`, type
(`rss` or `hn`), URL, topic, and language.
2. Mirror the slug + name into the unified web app's onboarding picker
at `apps/mana/apps/web/src/lib/modules/news/sources-meta.ts` so users
can opt out of it. **Slugs must match** — user blocklists reference
them.
3. Restart container and `curl -X POST http://localhost:3066/ingest/run`
to populate immediately.
## Topics
The seven shipped topics are: `tech`, `wissenschaft`, `weltgeschehen`,
`wirtschaft`, `kultur`, `gesundheit`, `politik`. Adding a new topic
means updating the `Topic` union in `src/sources.ts` AND the matching
type in the unified web app's `news/types.ts`.
## Database
Schema: `news` in `mana_platform`. Single table `curated_articles`,
indexed on `(topic, published_at)`, `(language, published_at)`,
`source_slug`, and `ingested_at`.
`bun run db:push` pushes the schema. The schema is intentionally NOT
referenced from `apps/api``apps/api/src/modules/news/routes.ts`
queries the table via raw SQL to keep the API service free of a Drizzle
schema dependency on this service.
## Environment Variables
```env
PORT=3066
DATABASE_URL=postgresql://mana:devpassword@localhost:5432/mana_platform
TICK_INTERVAL_MS=900000 # 15 minutes
RUN_ON_STARTUP=true
```
## Local Dev
```bash
cd services/news-ingester
bun install
bun run db:push # creates news.curated_articles
bun run dev # starts on :3066, ticks immediately
curl -X POST http://localhost:3066/ingest/run
curl http://localhost:3066/status | jq
```
## Privacy / Legal
Only public RSS feeds intended for syndication are ingested. The
`User-Agent` is `ManaNewsIngester/1.0 (+https://mana.how/news)` so site
owners can identify and contact us. Per-source rate limit is implicit
(15 min interval × ~30 items/source = ~2 req/min/source).
User reading behavior is **not** tracked here. Personalization happens
client-side in the unified Mana app's local IndexedDB; the ingester
only knows what was published, not what was read.

View file

@ -0,0 +1,18 @@
FROM oven/bun:1 AS production
WORKDIR /app
# Copy package files and install
COPY package.json bun.lock* ./
RUN bun install --frozen-lockfile 2>/dev/null || bun install
# Copy source
COPY src ./src
COPY tsconfig.json drizzle.config.ts ./
EXPOSE 3066
HEALTHCHECK --interval=60s --timeout=10s --start-period=30s --retries=3 \
CMD bun -e "fetch('http://localhost:3066/health').then(r => process.exit(r.ok ? 0 : 1)).catch(() => process.exit(1))"
CMD ["bun", "run", "src/index.ts"]

View file

@ -0,0 +1,11 @@
import { defineConfig } from 'drizzle-kit';
export default defineConfig({
schema: './src/db/schema.ts',
out: './drizzle',
dialect: 'postgresql',
dbCredentials: {
url: process.env.DATABASE_URL || 'postgresql://mana:devpassword@localhost:5432/mana_platform',
},
schemaFilter: ['news'],
});

View file

@ -0,0 +1,27 @@
{
"name": "@mana/news-ingester",
"version": "0.1.0",
"private": true,
"type": "module",
"scripts": {
"dev": "bun run --watch src/index.ts",
"start": "bun run src/index.ts",
"db:push": "drizzle-kit push",
"db:generate": "drizzle-kit generate",
"db:studio": "drizzle-kit studio"
},
"dependencies": {
"@mana/shared-hono": "workspace:*",
"@mozilla/readability": "^0.5.0",
"drizzle-orm": "^0.38.3",
"hono": "^4.7.0",
"jsdom": "^25.0.1",
"postgres": "^3.4.5",
"rss-parser": "^3.13.0"
},
"devDependencies": {
"@types/jsdom": "^21.1.7",
"drizzle-kit": "^0.30.4",
"typescript": "^5.9.3"
}
}

View file

@ -0,0 +1,21 @@
/**
* Environment-driven config. Defaults match the local dev setup
* (`pnpm setup:env` writes the same DATABASE_URL into .env files).
*/
export interface Config {
port: number;
databaseUrl: string;
tickIntervalMs: number;
runOnStartup: boolean;
}
export function loadConfig(): Config {
return {
port: parseInt(process.env.PORT || '3066', 10),
databaseUrl:
process.env.DATABASE_URL || 'postgresql://mana:devpassword@localhost:5432/mana_platform',
tickIntervalMs: parseInt(process.env.TICK_INTERVAL_MS || '900000', 10), // 15 min
runOnStartup: (process.env.RUN_ON_STARTUP || 'true') !== 'false',
};
}

View file

@ -0,0 +1,19 @@
/**
* Single Postgres connection pool, lazily instantiated.
*/
import { drizzle } from 'drizzle-orm/postgres-js';
import postgres from 'postgres';
import * as schema from './schema';
let db: ReturnType<typeof drizzle<typeof schema>> | null = null;
export function getDb(databaseUrl: string) {
if (!db) {
const client = postgres(databaseUrl, { max: 5 });
db = drizzle(client, { schema });
}
return db;
}
export type Database = ReturnType<typeof getDb>;

View file

@ -0,0 +1,56 @@
/**
* News schema public pool of curated articles.
*
* This is the *shared* article pool that the ingester writes into and the
* unified mana-api reads from for the News Hub feed. It is intentionally
* not user-scoped: the same article row is visible to every user. Per-user
* personalization (interests, blocklist, reactions) lives client-side in
* the unified Mana app's IndexedDB, not here.
*
* Articles older than 30 days are pruned by the ingester. Saving an
* article into a user's reading list copies the row into their local
* encrypted `newsArticles` table the pool is fire-and-forget.
*/
import { pgSchema, uuid, integer, text, timestamp, index } from 'drizzle-orm/pg-core';
export const newsSchema = pgSchema('news');
/**
* Pool of curated articles ingested from public RSS/JSON feeds.
*
* `urlHash` (sha256 of originalUrl) is the dedupe key if the same URL
* shows up in two feeds, only the first wins. `topic` is assigned by the
* ingester from a static sourcetopic mapping; we do not classify content.
*/
export const curatedArticles = newsSchema.table(
'curated_articles',
{
id: uuid('id').primaryKey().defaultRandom(),
urlHash: text('url_hash').notNull().unique(),
originalUrl: text('original_url').notNull(),
title: text('title').notNull(),
excerpt: text('excerpt'),
content: text('content'),
htmlContent: text('html_content'),
author: text('author'),
siteName: text('site_name').notNull(),
sourceSlug: text('source_slug').notNull(),
imageUrl: text('image_url'),
topic: text('topic').notNull(),
language: text('language').notNull(),
wordCount: integer('word_count'),
readingTimeMinutes: integer('reading_time_minutes'),
publishedAt: timestamp('published_at', { withTimezone: true }),
ingestedAt: timestamp('ingested_at', { withTimezone: true }).defaultNow().notNull(),
},
(table) => ({
topicPublishedIdx: index('curated_topic_published_idx').on(table.topic, table.publishedAt),
langPublishedIdx: index('curated_lang_published_idx').on(table.language, table.publishedAt),
sourceIdx: index('curated_source_idx').on(table.sourceSlug),
ingestedAtIdx: index('curated_ingested_at_idx').on(table.ingestedAt),
})
);
export type CuratedArticle = typeof curatedArticles.$inferSelect;
export type NewCuratedArticle = typeof curatedArticles.$inferInsert;

View file

@ -0,0 +1,84 @@
/**
* news-ingester pulls public RSS / JSON feeds into news.curated_articles
* on a fixed interval. Exposes a tiny Hono server for health + manual
* trigger so the container can be probed and re-kicked without a restart.
*
* Why a long-running container instead of a host cron:
* - logs land in the same docker stack as everything else
* - restarts on crash via docker
* - health endpoint for the docker-compose healthcheck
* - lets us hit /ingest/run from a shell to debug new sources without
* waiting 15 minutes
*/
import { Hono } from 'hono';
import { sql } from 'drizzle-orm';
import { loadConfig } from './config';
import { getDb } from './db/connection';
import { runIngestTick, type TickResult } from './ingest';
const config = loadConfig();
const db = getDb(config.databaseUrl);
let lastTick: TickResult | null = null;
let running = false;
async function tick() {
if (running) {
console.log('[news-ingester] previous tick still running, skipping');
return;
}
running = true;
try {
lastTick = await runIngestTick(db);
} catch (err) {
console.error('[news-ingester] tick failed:', err);
} finally {
running = false;
}
}
// ─── Hono app ──────────────────────────────────────────────
const app = new Hono();
app.get('/health', async (c) => {
try {
// Cheap connectivity check — don't claim healthy if Postgres is down.
await db.execute(sql`SELECT 1`);
} catch {
return c.json({ status: 'degraded', service: 'news-ingester' }, 503);
}
return c.json({
status: 'ok',
service: 'news-ingester',
lastTickStartedAt: lastTick?.startedAt ?? null,
lastTickInserted: lastTick?.totalInserted ?? null,
running,
});
});
app.get('/status', (c) => c.json(lastTick ?? { message: 'no tick yet' }));
app.post('/ingest/run', async (c) => {
if (running) return c.json({ status: 'busy' }, 409);
// Fire-and-forget; client polls /status.
void tick();
return c.json({ status: 'started' });
});
// ─── Bootstrap ─────────────────────────────────────────────
console.log(
`[news-ingester] starting on port ${config.port}, tick every ${config.tickIntervalMs}ms`
);
if (config.runOnStartup) {
// Defer one tick so the HTTP server is up first (healthchecks pass
// while we ingest).
setTimeout(() => void tick(), 5_000);
}
setInterval(() => void tick(), config.tickIntervalMs);
export default { port: config.port, fetch: app.fetch };

View file

@ -0,0 +1,179 @@
/**
* Ingest loop for each source, fetch the feed, normalize, dedupe by
* url-hash, optionally fall back to Readability for full text, and
* insert into `news.curated_articles`.
*
* Designed to be safe under repeated runs:
* - duplicate urls are caught by the unique index on `url_hash` and
* silently skipped via `ON CONFLICT DO NOTHING`.
* - one bad source must not poison the whole tick: every source is
* wrapped in its own try/catch.
*
* Retention: anything older than RETENTION_DAYS is pruned at the end of
* each tick. Saved articles already live in users' encrypted IndexedDB
* by then, so the pool is purely a discovery surface.
*/
import { createHash } from 'node:crypto';
import { sql } from 'drizzle-orm';
import type { Database } from './db/connection';
import { curatedArticles, type NewCuratedArticle } from './db/schema';
import { SOURCES, type NewsSource } from './sources';
import { fetchFeed, type NormalizedFeedItem } from './parsers/rss';
import { fetchHackerNews } from './parsers/hn';
import { fetchAndExtract } from './parsers/readability';
const RETENTION_DAYS = 30;
/** Min word count to consider an RSS body "full enough" to skip Readability. */
const FULL_TEXT_THRESHOLD_WORDS = 200;
function hashUrl(url: string): string {
return createHash('sha256').update(url).digest('hex');
}
function wordCountOf(text: string | null | undefined): number {
if (!text) return 0;
return text.split(/\s+/).filter(Boolean).length;
}
function readingMinutes(words: number): number {
return Math.max(1, Math.ceil(words / 200));
}
async function fetchSourceItems(source: NewsSource): Promise<NormalizedFeedItem[]> {
if (source.type === 'hn') return fetchHackerNews(source.url);
return fetchFeed(source.url);
}
/**
* Convert a normalized feed item into a `NewCuratedArticle` row,
* optionally enriching with Readability if the feed body is too thin.
*/
async function buildRow(
item: NormalizedFeedItem,
source: NewsSource
): Promise<NewCuratedArticle | null> {
if (!item.url || !item.title) return null;
let content = item.content;
let htmlContent = item.htmlContent;
let excerpt = item.excerpt;
let author = item.author;
let imageUrl = item.imageUrl;
const initialWords = wordCountOf(content);
if (initialWords < FULL_TEXT_THRESHOLD_WORDS) {
const extracted = await fetchAndExtract(item.url);
if (extracted) {
content = extracted.content;
htmlContent = extracted.htmlContent || htmlContent;
excerpt = excerpt || extracted.excerpt;
author = author || extracted.byline;
// imageUrl from RSS wins; Readability rarely has a good one.
imageUrl = imageUrl ?? null;
}
}
const words = wordCountOf(content);
if (words === 0) return null; // nothing usable, skip
return {
urlHash: hashUrl(item.url),
originalUrl: item.url,
title: item.title,
excerpt: excerpt ?? null,
content,
htmlContent: htmlContent ?? null,
author: author ?? null,
siteName: source.name,
sourceSlug: source.slug,
imageUrl,
topic: source.topic,
language: source.language,
wordCount: words,
readingTimeMinutes: readingMinutes(words),
publishedAt: item.publishedAt ?? new Date(),
};
}
interface SourceResult {
slug: string;
fetched: number;
inserted: number;
error?: string;
}
async function ingestSource(db: Database, source: NewsSource): Promise<SourceResult> {
const result: SourceResult = { slug: source.slug, fetched: 0, inserted: 0 };
let items: NormalizedFeedItem[];
try {
items = await fetchSourceItems(source);
} catch (err) {
result.error = err instanceof Error ? err.message : String(err);
return result;
}
result.fetched = items.length;
for (const item of items) {
try {
const row = await buildRow(item, source);
if (!row) continue;
const inserted = await db
.insert(curatedArticles)
.values(row)
.onConflictDoNothing({ target: curatedArticles.urlHash })
.returning({ id: curatedArticles.id });
if (inserted.length > 0) result.inserted += 1;
} catch (err) {
console.warn(
`[ingest] ${source.slug}: failed to insert "${item.title?.slice(0, 60) ?? '?'}":`,
err instanceof Error ? err.message : err
);
}
}
return result;
}
export interface TickResult {
startedAt: string;
durationMs: number;
sources: SourceResult[];
totalInserted: number;
pruned: number;
}
export async function runIngestTick(db: Database): Promise<TickResult> {
const start = Date.now();
const startedAt = new Date(start).toISOString();
const sources: SourceResult[] = [];
for (const source of SOURCES) {
const r = await ingestSource(db, source);
sources.push(r);
if (r.error) {
console.warn(`[ingest] ${r.slug}: ${r.error}`);
} else {
console.log(
`[ingest] ${r.slug}: ${r.inserted}/${r.fetched} new (topic=${SOURCES.find((s) => s.slug === r.slug)?.topic})`
);
}
}
// Retention sweep
const cutoff = new Date(Date.now() - RETENTION_DAYS * 24 * 60 * 60 * 1000);
const pruneRes = await db.execute(
sql`DELETE FROM news.curated_articles WHERE ingested_at < ${cutoff.toISOString()}`
);
// drizzle's postgres-js execute returns a result with `count` on most queries.
const pruned = (pruneRes as unknown as { count?: number }).count ?? 0;
const totalInserted = sources.reduce((acc, s) => acc + s.inserted, 0);
const durationMs = Date.now() - start;
console.log(`[ingest] tick complete: +${totalInserted} new, -${pruned} pruned, ${durationMs}ms`);
return { startedAt, durationMs, sources, totalInserted, pruned };
}

View file

@ -0,0 +1,57 @@
/**
* Hacker News firebase API parser.
*
* The HN top-stories endpoint returns ~500 IDs; we take the first 30 and
* fetch each item. External-link stories are kept (their `url` field is
* the article); Ask-HN / job posts (no `url`) are skipped because the
* "article" lives at the HN discussion page itself, which Readability
* struggles with and which isn't the user's expectation for a news feed.
*/
import type { NormalizedFeedItem } from './rss';
interface HnItem {
id: number;
type?: string;
by?: string;
time?: number;
title?: string;
url?: string;
text?: string;
}
const TOP_LIMIT = 30;
export async function fetchHackerNews(topStoriesUrl: string): Promise<NormalizedFeedItem[]> {
const idsResp = await fetch(topStoriesUrl, { signal: AbortSignal.timeout(15_000) });
if (!idsResp.ok) return [];
const ids = (await idsResp.json()) as number[];
const slice = ids.slice(0, TOP_LIMIT);
const items = await Promise.all(
slice.map(async (id) => {
try {
const r = await fetch(`https://hacker-news.firebaseio.com/v0/item/${id}.json`, {
signal: AbortSignal.timeout(10_000),
});
if (!r.ok) return null;
return (await r.json()) as HnItem;
} catch {
return null;
}
})
);
return items
.filter((it): it is HnItem => !!it && it.type === 'story' && !!it.url && !!it.title)
.map((it) => ({
url: it.url!,
title: it.title!,
excerpt: null,
content: null,
htmlContent: null,
author: it.by ?? null,
imageUrl: null,
publishedAt: it.time ? new Date(it.time * 1000) : null,
}));
}

View file

@ -0,0 +1,61 @@
/**
* Mozilla Readability fallback. Used when an RSS item only ships an
* excerpt, so we fetch the original page and extract the article body.
*
* Kept dependency-local to the ingester so this service is the canonical
* "content acquisition" boundary apps/api never has to call out to a
* crawler.
*/
import { Readability } from '@mozilla/readability';
import { JSDOM } from 'jsdom';
export interface ExtractedArticle {
title: string | null;
content: string;
htmlContent: string;
excerpt: string;
byline: string | null;
siteName: string | null;
wordCount: number;
readingTimeMinutes: number;
}
const USER_AGENT = 'Mozilla/5.0 (compatible; ManaNewsIngester/1.0; +https://mana.how/news)';
export async function fetchAndExtract(url: string): Promise<ExtractedArticle | null> {
let html: string;
try {
const response = await fetch(url, {
headers: { 'User-Agent': USER_AGENT },
signal: AbortSignal.timeout(15_000),
});
if (!response.ok) return null;
html = await response.text();
} catch {
return null;
}
try {
const dom = new JSDOM(html, { url });
const reader = new Readability(dom.window.document);
const article = reader.parse();
if (!article || !article.textContent) return null;
const wordCount = article.textContent.split(/\s+/).filter(Boolean).length;
const readingTimeMinutes = Math.max(1, Math.ceil(wordCount / 200));
return {
title: article.title ?? null,
content: article.textContent,
htmlContent: article.content ?? '',
excerpt: article.excerpt || article.textContent.slice(0, 240),
byline: article.byline ?? null,
siteName: article.siteName ?? null,
wordCount,
readingTimeMinutes,
};
} catch {
return null;
}
}

View file

@ -0,0 +1,72 @@
/**
* RSS/Atom parser wrapper. Returns a normalized shape so the ingest loop
* doesn't have to know about feed-format quirks.
*
* `rss-parser` handles both RSS 2.0 and Atom transparently. We pull out
* the bits we need (link, title, content/snippet, image, date) and let
* the ingester decide whether to call Readability for full-text.
*/
import Parser from 'rss-parser';
export interface NormalizedFeedItem {
url: string;
title: string;
excerpt: string | null;
content: string | null;
htmlContent: string | null;
author: string | null;
imageUrl: string | null;
publishedAt: Date | null;
}
type CustomItem = {
'media:content'?: { $: { url: string } };
'media:thumbnail'?: { $: { url: string } };
enclosure?: { url?: string };
};
const parser: Parser<unknown, CustomItem> = new Parser({
timeout: 15_000,
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; ManaNewsIngester/1.0; +https://mana.how/news)',
},
customFields: {
item: ['media:content', 'media:thumbnail', 'enclosure'],
},
});
export async function fetchFeed(url: string): Promise<NormalizedFeedItem[]> {
const feed = await parser.parseURL(url);
return (feed.items ?? []).map((item) => {
// rss-parser stuffs full HTML in `content` for Atom, plain in `contentSnippet`.
const html = (item as { content?: string }).content ?? null;
const text = (item as { contentSnippet?: string }).contentSnippet ?? null;
// Image: try a few common locations.
const mediaContent = item['media:content']?.$?.url;
const mediaThumb = item['media:thumbnail']?.$?.url;
const enclosureUrl = item.enclosure?.url;
const imageUrl = mediaContent ?? mediaThumb ?? enclosureUrl ?? null;
const link = (item as { link?: string }).link ?? '';
const title = (item as { title?: string }).title ?? '';
const author =
(item as { creator?: string; author?: string }).creator ??
(item as { author?: string }).author ??
null;
const isoDate = (item as { isoDate?: string }).isoDate ?? null;
return {
url: link,
title,
excerpt: text,
content: text,
htmlContent: html,
author,
imageUrl,
publishedAt: isoDate ? new Date(isoDate) : null,
};
});
}

View file

@ -0,0 +1,260 @@
/**
* Curated source list single source of truth for the news ingester.
*
* Each source declares its parser type (`rss` or `hn`), the topic it
* belongs to, and the language. The `slug` MUST be stable across deploys
* because user blocklists reference it from client-side storage.
*
* Adding a source = append a row here, redeploy. The unified Mana app
* mirrors a sanitized subset (slug + name + topic + language) in
* `apps/mana/apps/web/src/lib/modules/news/sources-meta.ts` for the
* onboarding picker keep both files in sync when editing.
*/
export type SourceParserType = 'rss' | 'hn';
export type Topic =
| 'tech'
| 'wissenschaft'
| 'weltgeschehen'
| 'wirtschaft'
| 'kultur'
| 'gesundheit'
| 'politik';
export interface NewsSource {
slug: string;
name: string;
type: SourceParserType;
url: string;
topic: Topic;
language: 'de' | 'en';
}
export const SOURCES: NewsSource[] = [
// ─── Tech ──────────────────────────────────────────────────
{
slug: 'hacker-news',
name: 'Hacker News',
type: 'hn',
url: 'https://hacker-news.firebaseio.com/v0/topstories.json',
topic: 'tech',
language: 'en',
},
{
slug: 'arstechnica',
name: 'Ars Technica',
type: 'rss',
url: 'https://feeds.arstechnica.com/arstechnica/index',
topic: 'tech',
language: 'en',
},
{
slug: 'theverge',
name: 'The Verge',
type: 'rss',
url: 'https://www.theverge.com/rss/index.xml',
topic: 'tech',
language: 'en',
},
{
slug: 'heise',
name: 'heise online',
type: 'rss',
url: 'https://www.heise.de/rss/heise-atom.xml',
topic: 'tech',
language: 'de',
},
// ─── Wissenschaft ──────────────────────────────────────────
{
slug: 'quanta-magazine',
name: 'Quanta Magazine',
type: 'rss',
url: 'https://api.quantamagazine.org/feed/',
topic: 'wissenschaft',
language: 'en',
},
{
slug: 'spektrum',
name: 'Spektrum',
type: 'rss',
url: 'https://www.spektrum.de/alias/rss/spektrum-de-rss-feed/996406',
topic: 'wissenschaft',
language: 'de',
},
{
slug: 'nature-news',
name: 'Nature News',
type: 'rss',
url: 'https://www.nature.com/nature.rss',
topic: 'wissenschaft',
language: 'en',
},
{
slug: 'phys-org',
name: 'Phys.org',
type: 'rss',
url: 'https://phys.org/rss-feed/',
topic: 'wissenschaft',
language: 'en',
},
// ─── Weltgeschehen ─────────────────────────────────────────
// Note: Reuters and AP both block automated feed fetchers as of
// 2026-04 (Reuters returns 406, AP refuses connection). Replaced
// with Al Jazeera and DW which both publish open RSS.
{
slug: 'tagesschau',
name: 'Tagesschau',
type: 'rss',
url: 'https://www.tagesschau.de/xml/rss2/',
topic: 'weltgeschehen',
language: 'de',
},
{
slug: 'bbc-world',
name: 'BBC World',
type: 'rss',
url: 'https://feeds.bbci.co.uk/news/world/rss.xml',
topic: 'weltgeschehen',
language: 'en',
},
{
slug: 'aljazeera',
name: 'Al Jazeera',
type: 'rss',
url: 'https://www.aljazeera.com/xml/rss/all.xml',
topic: 'weltgeschehen',
language: 'en',
},
{
slug: 'dw-top',
name: 'Deutsche Welle',
type: 'rss',
url: 'https://rss.dw.com/rdf/rss-en-top',
topic: 'weltgeschehen',
language: 'en',
},
// ─── Wirtschaft ────────────────────────────────────────────
{
slug: 'handelsblatt',
name: 'Handelsblatt',
type: 'rss',
url: 'https://www.handelsblatt.com/contentexport/feed/schlagzeilen',
topic: 'wirtschaft',
language: 'de',
},
{
slug: 'ft-world',
name: 'Financial Times',
type: 'rss',
url: 'https://www.ft.com/world?format=rss',
topic: 'wirtschaft',
language: 'en',
},
{
slug: 'bloomberg-markets',
name: 'Bloomberg Markets',
type: 'rss',
url: 'https://feeds.bloomberg.com/markets/news.rss',
topic: 'wirtschaft',
language: 'en',
},
{
slug: 'economist-finance',
name: 'The Economist — Finance',
type: 'rss',
url: 'https://www.economist.com/finance-and-economics/rss.xml',
topic: 'wirtschaft',
language: 'en',
},
// ─── Kultur ────────────────────────────────────────────────
// Perlentaucher and ZEIT Kultur both 404'd in testing (2026-04);
// replaced with NPR Arts and Guardian Books which are stable.
{
slug: 'guardian-culture',
name: 'The Guardian Culture',
type: 'rss',
url: 'https://www.theguardian.com/culture/rss',
topic: 'kultur',
language: 'en',
},
{
slug: 'guardian-books',
name: 'The Guardian Books',
type: 'rss',
url: 'https://www.theguardian.com/books/rss',
topic: 'kultur',
language: 'en',
},
{
slug: 'npr-arts',
name: 'NPR Arts',
type: 'rss',
url: 'https://feeds.npr.org/1008/rss.xml',
topic: 'kultur',
language: 'en',
},
// ─── Gesundheit ────────────────────────────────────────────
// Ärzteblatt and NIH both 404'd; STAT News still works. Added
// BBC Health and ScienceDaily as reliable replacements.
{
slug: 'stat-news',
name: 'STAT News',
type: 'rss',
url: 'https://www.statnews.com/feed/',
topic: 'gesundheit',
language: 'en',
},
{
slug: 'bbc-health',
name: 'BBC Health',
type: 'rss',
url: 'https://feeds.bbci.co.uk/news/health/rss.xml',
topic: 'gesundheit',
language: 'en',
},
{
slug: 'sciencedaily-health',
name: 'ScienceDaily Health',
type: 'rss',
url: 'https://www.sciencedaily.com/rss/health_medicine.xml',
topic: 'gesundheit',
language: 'en',
},
// ─── Politik ───────────────────────────────────────────────
{
slug: 'spiegel-politik',
name: 'Spiegel Politik',
type: 'rss',
url: 'https://www.spiegel.de/politik/index.rss',
topic: 'politik',
language: 'de',
},
{
slug: 'politico-eu',
name: 'Politico EU',
type: 'rss',
url: 'https://www.politico.eu/feed/',
topic: 'politik',
language: 'en',
},
{
slug: 'atlantic-politics',
name: 'The Atlantic — Politics',
type: 'rss',
url: 'https://www.theatlantic.com/feed/channel/politics/',
topic: 'politik',
language: 'en',
},
];
/** Build a quick lookup by slug. */
export const SOURCE_BY_SLUG: Record<string, NewsSource> = Object.fromEntries(
SOURCES.map((s) => [s.slug, s])
);

View file

@ -0,0 +1,17 @@
{
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"outDir": "dist",
"rootDir": "src",
"declaration": true,
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["src/**/*.ts"]
}