feat(api): URL-Kontext auch in /decks/generate + fetchUrlContent extrahieren
- `lib/url-fetch.ts`: fetchUrlContent aus decks-from-image herausgezogen — gemeinsam genutzte Logik für mana-search + direktes HTTP-Fetch-Fallback - `decks-generate.ts`: optionales `url`-Feld im Input-Schema; URL-Inhalt wird an den Prompt angehängt wenn vorhanden - `decks.ts` (web): `generateDeck()` akzeptiert jetzt `url?: string` - UI: imageUrl wird für Text-KI + Bild-KI als Kontext genutzt Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
595f1f9cb6
commit
dc382a795d
6 changed files with 63 additions and 55 deletions
50
apps/api/src/lib/url-fetch.ts
Normal file
50
apps/api/src/lib/url-fetch.ts
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
const MAX_URL_CHARS = 8_000;
|
||||
const MANA_SEARCH_URL = process.env.MANA_SEARCH_URL ?? 'http://localhost:3076';
|
||||
|
||||
export async function fetchUrlContent(url: string): Promise<string | null> {
|
||||
// Prefer mana-search (go-readability quality)
|
||||
try {
|
||||
const res = await fetch(`${MANA_SEARCH_URL}/api/v1/extract`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ url, options: { includeMarkdown: true, maxLength: MAX_URL_CHARS } }),
|
||||
signal: AbortSignal.timeout(8_000),
|
||||
});
|
||||
if (res.ok) {
|
||||
const data = await res.json() as {
|
||||
success: boolean;
|
||||
content?: { title?: string; markdown?: string; text?: string };
|
||||
};
|
||||
if (data.success && data.content) {
|
||||
const text = data.content.markdown || data.content.text || '';
|
||||
if (text.trim()) {
|
||||
const title = data.content.title ? `# ${data.content.title}\n\n` : '';
|
||||
return (title + text).slice(0, MAX_URL_CHARS);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// mana-search nicht erreichbar — Fallback auf direktes Fetch
|
||||
}
|
||||
|
||||
// Fallback: direktes HTTP-Fetch + einfaches HTML-Stripping
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; mana-cards/1.0; +https://cardecky.mana.how)' },
|
||||
signal: AbortSignal.timeout(10_000),
|
||||
});
|
||||
if (!res.ok) return null;
|
||||
const html = await res.text();
|
||||
const text = html
|
||||
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, ' ')
|
||||
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, ' ')
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/ /g, ' ')
|
||||
.replace(/&#?\w+;/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
return text ? text.slice(0, MAX_URL_CHARS) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -5,15 +5,13 @@ import { getDb, type CardsDb } from '../db/connection.ts';
|
|||
import { authMiddleware, type AuthVars } from '../middleware/auth.ts';
|
||||
import { chatVisionJson } from '../services/llm-client.ts';
|
||||
import { GeneratedDeckSchema, insertGeneratedDeck } from './decks-generate.ts';
|
||||
import { fetchUrlContent } from '../lib/url-fetch.ts';
|
||||
|
||||
export type FromImageDeps = { db?: CardsDb };
|
||||
|
||||
const MAX_FILES = 5;
|
||||
const MAX_BYTES_PER_IMAGE = 10 * 1024 * 1024;
|
||||
const MAX_BYTES_PER_PDF = 30 * 1024 * 1024;
|
||||
const MAX_URL_CHARS = 8_000;
|
||||
|
||||
const MANA_SEARCH_URL = process.env.MANA_SEARCH_URL ?? 'http://localhost:3076';
|
||||
|
||||
function isAllowedMime(mime: string): boolean {
|
||||
return mime.startsWith('image/') || mime === 'application/pdf';
|
||||
|
|
@ -23,54 +21,6 @@ function maxBytesFor(mime: string): number {
|
|||
return mime === 'application/pdf' ? MAX_BYTES_PER_PDF : MAX_BYTES_PER_IMAGE;
|
||||
}
|
||||
|
||||
async function fetchUrlContent(url: string): Promise<string | null> {
|
||||
// Prefer mana-search (go-readability quality)
|
||||
try {
|
||||
const res = await fetch(`${MANA_SEARCH_URL}/api/v1/extract`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ url, options: { includeMarkdown: true, maxLength: MAX_URL_CHARS } }),
|
||||
signal: AbortSignal.timeout(8_000),
|
||||
});
|
||||
if (res.ok) {
|
||||
const data = await res.json() as {
|
||||
success: boolean;
|
||||
content?: { title?: string; markdown?: string; text?: string };
|
||||
};
|
||||
if (data.success && data.content) {
|
||||
const text = data.content.markdown || data.content.text || '';
|
||||
if (text.trim()) {
|
||||
const title = data.content.title ? `# ${data.content.title}\n\n` : '';
|
||||
return (title + text).slice(0, MAX_URL_CHARS);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// mana-search nicht erreichbar — Fallback auf direktes Fetch
|
||||
}
|
||||
|
||||
// Fallback: direktes HTTP-Fetch + einfaches HTML-Stripping
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; mana-cards/1.0; +https://cardecky.mana.how)' },
|
||||
signal: AbortSignal.timeout(10_000),
|
||||
});
|
||||
if (!res.ok) return null;
|
||||
const html = await res.text();
|
||||
const text = html
|
||||
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, ' ')
|
||||
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, ' ')
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/ /g, ' ')
|
||||
.replace(/&#?\w+;/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
return text ? text.slice(0, MAX_URL_CHARS) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const InputSchema = z.object({
|
||||
language: z.enum(['de', 'en']).optional().default('de'),
|
||||
count: z.coerce.number().int().min(1).max(40).optional().default(15),
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import { cards, decks, reviews } from '../db/schema/index.ts';
|
|||
import { authMiddleware, type AuthVars } from '../middleware/auth.ts';
|
||||
import { ulid } from '../lib/ulid.ts';
|
||||
import { chatJson } from '../services/llm-client.ts';
|
||||
import { fetchUrlContent } from '../lib/url-fetch.ts';
|
||||
|
||||
export type GenerateDeps = { db?: CardsDb };
|
||||
|
||||
|
|
@ -102,6 +103,7 @@ const GenerateInputSchema = z.object({
|
|||
prompt: z.string().min(3).max(500),
|
||||
language: z.enum(['de', 'en']).optional().default('de'),
|
||||
count: z.number().int().min(1).max(40).optional().default(15),
|
||||
url: z.string().url().max(2000).optional(),
|
||||
});
|
||||
|
||||
const SYSTEM_PROMPT = `Du bist ein Lerndesigner und erstellst Karteikarten-Decks für Spaced-Repetition-Lernen.
|
||||
|
|
@ -140,11 +142,17 @@ export function decksGenerateRouter(deps: GenerateDeps = {}): Hono<{ Variables:
|
|||
);
|
||||
}
|
||||
|
||||
const userPrompt = `Sprache: ${parsed.data.language}
|
||||
const urlContent = parsed.data.url ? await fetchUrlContent(parsed.data.url) : null;
|
||||
|
||||
let userPrompt = `Sprache: ${parsed.data.language}
|
||||
Erstelle ein Deck zu folgendem Thema mit etwa ${parsed.data.count} Karten:
|
||||
|
||||
${parsed.data.prompt}`;
|
||||
|
||||
if (urlContent) {
|
||||
userPrompt += `\n\nURL-Kontext (${parsed.data.url}):\n${urlContent}`;
|
||||
}
|
||||
|
||||
// LLM aufrufen + JSON parsen + Schema validieren.
|
||||
let generated: GeneratedDeck;
|
||||
try {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue