managarten/apps/api/src/modules/news/routes.ts

/**
 * News module — Reads the curated article pool + extracts ad-hoc URLs.
 *
 * Pool population: handled by the standalone `services/news-ingester`
 * Bun service, which writes into `news.curated_articles` on a 15 min
 * loop. This route file just reads from that table.
 *
 * Saved articles (the user's personal reading list) live entirely in
 * the unified Mana app's local-first IndexedDB and sync via mana-sync;
 * this module never sees them.
 */

import { Hono } from 'hono';
import { extractFromUrl } from '@mana/shared-rss';
import { drizzle } from 'drizzle-orm/postgres-js';
import { sql } from 'drizzle-orm';
import { getConnection } from '../../lib/db';

// ─── DB Connection (reads from news.curated_articles) ──────

const db = drizzle(getConnection());

// ─── Routes ─────────────────────────────────────────────────

const routes = new Hono();

// ─── Feed (reads from news.curated_articles) ───────────────
//
// Query params:
//   topics  — comma-separated topic slugs (tech,wissenschaft,…). If
//             omitted, all topics are returned.
//   lang    — 'de' | 'en' | 'all' (default 'all')
//   since   — ISO timestamp; only articles published after this
//   limit   — default 50, max 200
//   offset  — default 0
//
// Returns the full article body so the client can render the reader
// without a second round-trip. Curated articles are small (≤30 KB
// each) and the client caches them locally for offline reading.

routes.get('/feed', async (c) => {
	const topicsParam = c.req.query('topics');
	const lang = c.req.query('lang') ?? 'all';
	const since = c.req.query('since');
	const limit = Math.min(parseInt(c.req.query('limit') || '50', 10), 200);
	const offset = parseInt(c.req.query('offset') || '0', 10);

	const conditions: ReturnType<typeof sql>[] = [];

	if (topicsParam) {
		const topics = topicsParam
			.split(',')
			.map((t) => t.trim())
			.filter(Boolean);
		if (topics.length > 0) {
			conditions.push(sql`topic = ANY(${topics})`);
		}
	}
	if (lang === 'de' || lang === 'en') {
		conditions.push(sql`language = ${lang}`);
	}
	if (since) {
		conditions.push(sql`published_at > ${since}`);
	}

	const whereClause =
		conditions.length > 0
			? sql.join([sql`WHERE`, sql.join(conditions, sql` AND `)], sql` `)
			: sql``;

	const result = await db.execute(sql`
		SELECT
			id,
			original_url   AS "originalUrl",
			title,
			excerpt,
			content,
			html_content   AS "htmlContent",
			author,
			site_name      AS "siteName",
			source_slug    AS "sourceSlug",
			image_url      AS "imageUrl",
			topic,
			language,
			word_count     AS "wordCount",
			reading_time_minutes AS "readingTimeMinutes",
			published_at   AS "publishedAt",
			ingested_at    AS "ingestedAt"
		FROM news.curated_articles
		${whereClause}
		ORDER BY published_at DESC NULLS LAST, ingested_at DESC
		LIMIT ${limit} OFFSET ${offset}
	`);

	return c.json(result as unknown as Record<string, unknown>[]);
});

// ─── Extract (content extraction for user-pasted URLs) ─────

routes.post('/extract/preview', async (c) => {
	const { url } = await c.req.json<{ url: string }>();
	if (!url) return c.json({ error: 'URL is required' }, 400);

	const article = await extractFromUrl(url);
	if (!article) return c.json({ error: 'Extraction failed' }, 502);
	return c.json(article);
});

routes.post('/extract/save', async (c) => {
	const { url } = await c.req.json<{ url: string }>();
	if (!url) return c.json({ error: 'URL is required' }, 400);

	const extracted = await extractFromUrl(url);
	if (!extracted) return c.json({ error: 'Extraction failed' }, 502);

	return c.json({
		id: crypto.randomUUID(),
		type: 'saved',
		sourceOrigin: 'user_saved',
		originalUrl: url,
		title: extracted.title,
		content: extracted.content,
		htmlContent: extracted.htmlContent,
		excerpt: extracted.excerpt,
		author: extracted.byline,
		siteName: extracted.siteName,
		wordCount: extracted.wordCount,
		readingTimeMinutes: extracted.readingTimeMinutes,
		isArchived: false,
	});
});

export { routes as newsRoutes };