const MAX_URL_CHARS = 8_000; const MANA_SEARCH_URL = process.env.MANA_SEARCH_URL ?? 'http://localhost:3076'; export async function fetchUrlContent(url: string): Promise { // Prefer mana-search (go-readability quality) try { const res = await fetch(`${MANA_SEARCH_URL}/api/v1/extract`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ url, options: { includeMarkdown: true, maxLength: MAX_URL_CHARS } }), signal: AbortSignal.timeout(8_000), }); if (res.ok) { const data = await res.json() as { success: boolean; content?: { title?: string; markdown?: string; text?: string }; }; if (data.success && data.content) { const text = data.content.markdown || data.content.text || ''; if (text.trim()) { const title = data.content.title ? `# ${data.content.title}\n\n` : ''; return (title + text).slice(0, MAX_URL_CHARS); } } } } catch { // mana-search nicht erreichbar — Fallback auf direktes Fetch } // Fallback: direktes HTTP-Fetch + einfaches HTML-Stripping try { const res = await fetch(url, { headers: { 'User-Agent': 'Mozilla/5.0 (compatible; mana-cards/1.0; +https://cardecky.mana.how)' }, signal: AbortSignal.timeout(10_000), }); if (!res.ok) return null; const html = await res.text(); const text = html .replace(/]*>[\s\S]*?<\/script>/gi, ' ') .replace(/]*>[\s\S]*?<\/style>/gi, ' ') .replace(/<[^>]+>/g, ' ') .replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/ /g, ' ') .replace(/&#?\w+;/g, ' ') .replace(/\s+/g, ' ') .trim(); return text ? text.slice(0, MAX_URL_CHARS) : null; } catch { return null; } }