- `lib/url-fetch.ts`: fetchUrlContent aus decks-from-image herausgezogen — gemeinsam genutzte Logik für mana-search + direktes HTTP-Fetch-Fallback - `decks-generate.ts`: optionales `url`-Feld im Input-Schema; URL-Inhalt wird an den Prompt angehängt wenn vorhanden - `decks.ts` (web): `generateDeck()` akzeptiert jetzt `url?: string` - UI: imageUrl wird für Text-KI + Bild-KI als Kontext genutzt Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
50 lines
1.7 KiB
TypeScript
50 lines
1.7 KiB
TypeScript
const MAX_URL_CHARS = 8_000;
|
|
const MANA_SEARCH_URL = process.env.MANA_SEARCH_URL ?? 'http://localhost:3076';
|
|
|
|
export async function fetchUrlContent(url: string): Promise<string | null> {
|
|
// Prefer mana-search (go-readability quality)
|
|
try {
|
|
const res = await fetch(`${MANA_SEARCH_URL}/api/v1/extract`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ url, options: { includeMarkdown: true, maxLength: MAX_URL_CHARS } }),
|
|
signal: AbortSignal.timeout(8_000),
|
|
});
|
|
if (res.ok) {
|
|
const data = await res.json() as {
|
|
success: boolean;
|
|
content?: { title?: string; markdown?: string; text?: string };
|
|
};
|
|
if (data.success && data.content) {
|
|
const text = data.content.markdown || data.content.text || '';
|
|
if (text.trim()) {
|
|
const title = data.content.title ? `# ${data.content.title}\n\n` : '';
|
|
return (title + text).slice(0, MAX_URL_CHARS);
|
|
}
|
|
}
|
|
}
|
|
} catch {
|
|
// mana-search nicht erreichbar — Fallback auf direktes Fetch
|
|
}
|
|
|
|
// Fallback: direktes HTTP-Fetch + einfaches HTML-Stripping
|
|
try {
|
|
const res = await fetch(url, {
|
|
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; mana-cards/1.0; +https://cardecky.mana.how)' },
|
|
signal: AbortSignal.timeout(10_000),
|
|
});
|
|
if (!res.ok) return null;
|
|
const html = await res.text();
|
|
const text = html
|
|
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, ' ')
|
|
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, ' ')
|
|
.replace(/<[^>]+>/g, ' ')
|
|
.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/ /g, ' ')
|
|
.replace(/&#?\w+;/g, ' ')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
return text ? text.slice(0, MAX_URL_CHARS) : null;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|