cards/apps/api/src/lib/url-fetch.ts
Till JS dc382a795d feat(api): URL-Kontext auch in /decks/generate + fetchUrlContent extrahieren
- `lib/url-fetch.ts`: fetchUrlContent aus decks-from-image herausgezogen
  — gemeinsam genutzte Logik für mana-search + direktes HTTP-Fetch-Fallback
- `decks-generate.ts`: optionales `url`-Feld im Input-Schema;
  URL-Inhalt wird an den Prompt angehängt wenn vorhanden
- `decks.ts` (web): `generateDeck()` akzeptiert jetzt `url?: string`
- UI: imageUrl wird für Text-KI + Bild-KI als Kontext genutzt

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-10 16:39:39 +02:00

50 lines
1.7 KiB
TypeScript

const MAX_URL_CHARS = 8_000;
const MANA_SEARCH_URL = process.env.MANA_SEARCH_URL ?? 'http://localhost:3076';
export async function fetchUrlContent(url: string): Promise<string | null> {
// Prefer mana-search (go-readability quality)
try {
const res = await fetch(`${MANA_SEARCH_URL}/api/v1/extract`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url, options: { includeMarkdown: true, maxLength: MAX_URL_CHARS } }),
signal: AbortSignal.timeout(8_000),
});
if (res.ok) {
const data = await res.json() as {
success: boolean;
content?: { title?: string; markdown?: string; text?: string };
};
if (data.success && data.content) {
const text = data.content.markdown || data.content.text || '';
if (text.trim()) {
const title = data.content.title ? `# ${data.content.title}\n\n` : '';
return (title + text).slice(0, MAX_URL_CHARS);
}
}
}
} catch {
// mana-search nicht erreichbar — Fallback auf direktes Fetch
}
// Fallback: direktes HTTP-Fetch + einfaches HTML-Stripping
try {
const res = await fetch(url, {
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; mana-cards/1.0; +https://cardecky.mana.how)' },
signal: AbortSignal.timeout(10_000),
});
if (!res.ok) return null;
const html = await res.text();
const text = html
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, ' ')
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, ' ')
.replace(/<[^>]+>/g, ' ')
.replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&nbsp;/g, ' ')
.replace(/&#?\w+;/g, ' ')
.replace(/\s+/g, ' ')
.trim();
return text ? text.slice(0, MAX_URL_CHARS) : null;
} catch {
return null;
}
}