From 6d67db48d5ebf48a4e87f0a603afac4eb059fa65 Mon Sep 17 00:00:00 2001 From: Till JS Date: Wed, 6 May 2026 15:31:25 +0200 Subject: [PATCH] =?UTF-8?q?feat(forms):=20M9b=20conversation=20LLM-extract?= =?UTF-8?q?=20=E2=80=94=20free-text=20=E2=86=92=20typed=20Antwort?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Killer-Feature für den Conversation-Mode (M9): User kann auf choice/yes_no/rating-Feldern in eigenen Worten antworten ("ich nehme den zweiten Vorschlag" / "klar bin ich dabei" / "so 4 von 5"), ein LLM mappt das auf die strikte Option-ID / boolean / Integer. - apps/api/modules/forms/public-routes.ts: neuer POST /api/v1/forms/public/:token/conversation/extract Endpoint. Rate-limited (30/min/token + 60/min/IP — Owner-Side-Costs für haiku trotz unauthenticated-Pfad). freeText hard-cap 1000 Zeichen. Token-resolve via unlistedSnapshots, fieldId muss im publish-Schema existieren. Dispatch: - text/email/number/date: passthrough (free-text IST die Antwort) - single_choice/multi_choice/yes_no/rating: mana-llm haiku-Call mit field-spezifischem System-Prompt + JSON-only-Output, Parser validiert Option-IDs gegen das Schema (Hallucination-Schutz). Response { extracted, confidence: 'high' | 'low', alternatives? }. confidence='low' wenn LLM unsicher → Client zeigt Warnung im Preview-Block, User kann manuell auswählen. - ConversationFormView: collapsible
"Lieber in eigenen Worten antworten?"-Block unter den quick-reply-Buttons aller choice/yes_no/rating-Felder. User tippt Free-Text → "Verstehen" ruft endpoint → Preview-Karte mit der erkannten Antwort (teal=high-confidence, amber=low-confidence) → "Übernehmen" oder "Abbrechen". commitExtract löst setAnswerAndAdvance aus, läuft über den selben Pfad wie quick-reply-Klick. Schema-Validierung im Parser: - single_choice: optionId muss in field.options sein, sonst null - multi_choice: filtert nur valide IDs raus, Array kann leer sein - yes_no: nur true/false/null erlaubt - rating: round(value), bounds-check 1..ratingScale LLM-Call: - model claude-haiku-4-5 (cheapest) - temperature 0 (deterministisch) - maxTokens 200 (JSON-Output ist klein) - Markdown-code-fence-Strip für robustes JSON-Parsing Trade-offs: - Public-Endpoint = ungated LLM-Spend für Form-Owner. Rate-Limits + freeText-Cap mitigaten Spam, aber 30 Calls/min × 200 tokens = moderate Kosten pro Form. Owner sollte das im Hinterkopf haben. - Confidence='low' eskaliert zur User-Sichtbarkeit, bricht aber nicht den Flow — User kann übernehmen oder abbrechen. Forms-Tests 61/61 unverändert (extract braucht Live-LLM für E2E, absichtlich kein vitest-Mock). svelte-check 0 errors. apps/api buildet (1772 modules). Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/api/src/modules/forms/public-routes.ts | 268 ++++++++++++++++++ .../modules/forms/ConversationFormView.svelte | 200 +++++++++++++ 2 files changed, 468 insertions(+) diff --git a/apps/api/src/modules/forms/public-routes.ts b/apps/api/src/modules/forms/public-routes.ts index 239f7cc3f..c1d8aea31 100644 --- a/apps/api/src/modules/forms/public-routes.ts +++ b/apps/api/src/modules/forms/public-routes.ts @@ -283,4 +283,272 @@ async function sha256Hex(input: string): Promise { .join(''); } +// ─── Conversation LLM-extract (M9b) ───────────────────────── +// POST /:token/conversation/extract +// Body: { fieldId, freeText } +// Response: { extracted: AnswerValue, confidence: 'high'|'low', +// alternatives?: string[] } +// +// Maps a free-text natural-language answer ("ich nehme den zweiten") to +// a strict typed answer (option-id) for choice/yes_no fields. Calls +// mana-llm with the cheapest model (haiku) + structured-output prompt. +// +// Costs are owner-side: any visitor with the share-link can spend +// haiku tokens, so rate-limits stack (token + IP) and freeText is +// hard-capped at 1000 chars. + +const MANA_LLM_URL = process.env.MANA_LLM_URL ?? 'http://localhost:3030'; + +routes.use( + '/:token/conversation/extract', + rateLimitMiddleware({ + max: 30, + windowMs: 60_000, + keyFn: (c) => `forms:extract:token:${c.req.param('token')}`, + }) +); + +routes.use( + '/:token/conversation/extract', + rateLimitMiddleware({ + max: 60, + windowMs: 60_000, + keyFn: (c) => { + const ip = + c.req.header('x-forwarded-for')?.split(',')[0]?.trim() || + c.req.header('x-real-ip') || + 'unknown'; + return `forms:extract:ip:${ip}`; + }, + }) +); + +interface ExtractBody { + fieldId?: string; + freeText?: string; +} + +routes.post('/:token/conversation/extract', async (c) => { + const token = c.req.param('token'); + if (!TOKEN_REGEX.test(token)) { + return errorResponse(c, 'Invalid token format', 400, { code: 'INVALID_TOKEN' }); + } + + const rows = await db + .select({ + collection: snapshots.collection, + blob: snapshots.blob, + expiresAt: snapshots.expiresAt, + revokedAt: snapshots.revokedAt, + }) + .from(snapshots) + .where(eq(snapshots.token, token)) + .limit(1); + + const row = rows[0]; + if (!row || row.collection !== 'forms') { + return errorResponse(c, 'Link nicht gefunden', 404, { code: 'NOT_FOUND' }); + } + if (row.revokedAt) { + return errorResponse(c, 'Link wurde widerrufen', 410, { code: 'REVOKED' }); + } + if (row.expiresAt && row.expiresAt.getTime() < Date.now()) { + return errorResponse(c, 'Link ist abgelaufen', 410, { code: 'EXPIRED' }); + } + + let body: ExtractBody; + try { + body = (await c.req.json()) as ExtractBody; + } catch { + return errorResponse(c, 'Body muss valid JSON sein', 400, { code: 'INVALID_JSON' }); + } + + const fieldId = typeof body.fieldId === 'string' ? body.fieldId : ''; + const freeText = typeof body.freeText === 'string' ? body.freeText.trim() : ''; + if (!fieldId || !freeText) { + return errorResponse(c, 'fieldId und freeText sind erforderlich', 400, { + code: 'BAD_INPUT', + }); + } + if (freeText.length > 1000) { + return errorResponse(c, 'freeText zu lang (max 1000 Zeichen)', 400, { + code: 'TOO_LONG', + }); + } + + const blob = (row.blob ?? {}) as FormSnapshotBlob; + const field = (blob.fields ?? []).find((f) => f.id === fieldId); + if (!field) { + return errorResponse(c, 'Feld nicht im veröffentlichten Schema', 400, { + code: 'UNKNOWN_FIELD', + }); + } + + if ( + field.type !== 'single_choice' && + field.type !== 'multi_choice' && + field.type !== 'yes_no' && + field.type !== 'rating' + ) { + // For text/email/number/date the free-text IS the answer — no + // LLM extraction needed. Return as-is so the client treats it + // the same as a typed widget answer. + return c.json({ extracted: freeText, confidence: 'high' as const }); + } + + const { systemPrompt, userPrompt } = buildExtractPrompt(field, freeText); + + let llmContent: string; + try { + const llmRes = await fetch(`${MANA_LLM_URL}/api/v1/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userPrompt }, + ], + model: 'claude-haiku-4-5-20251001', + temperature: 0, + maxTokens: 200, + }), + }); + if (!llmRes.ok) { + throw new Error(`LLM error: ${llmRes.status}`); + } + const data = (await llmRes.json()) as { content: string }; + llmContent = data.content.trim(); + } catch (err) { + return errorResponse(c, `LLM-Extraktion fehlgeschlagen: ${(err as Error).message}`, 502, { + code: 'LLM_ERROR', + }); + } + + // Extract JSON from potential markdown code fences + const jsonMatch = llmContent.match(/```(?:json)?\s*([\s\S]*?)\s*```/) ?? [null, llmContent]; + let parsed: unknown; + try { + parsed = JSON.parse(jsonMatch[1] ?? llmContent); + } catch { + return errorResponse(c, 'LLM lieferte invalides JSON', 502, { code: 'LLM_BAD_JSON' }); + } + + const result = parseExtractResult(field, parsed); + if (!result) { + return errorResponse(c, 'LLM-Antwort passt nicht zum Feld', 502, { + code: 'LLM_BAD_SHAPE', + }); + } + return c.json(result); +}); + +function buildExtractPrompt( + field: NonNullable[number], + freeText: string +): { systemPrompt: string; userPrompt: string } { + const opts = field.options ?? []; + const optionsList = opts.map((o) => ` - id="${o.id}", label="${o.label}"`).join('\n'); + + if (field.type === 'single_choice') { + return { + systemPrompt: `Du mappst eine freitext-Antwort eines Form-Submitters auf genau eine der vorgegebenen Options-IDs. + +Feld-Frage: "${field.label ?? ''}" +Mögliche Optionen: +${optionsList} + +Antworte AUSSCHLIESSLICH mit einem JSON-Objekt der Form: +{ "optionId": "", "confidence": "high" | "low" } + +- "high" wenn die Zuordnung eindeutig ist +- "low" wenn die Antwort zu mehreren Optionen passt oder gar nicht +- Wenn keine Option passt: { "optionId": null, "confidence": "low" }`, + userPrompt: freeText, + }; + } + if (field.type === 'multi_choice') { + return { + systemPrompt: `Du mappst eine freitext-Antwort auf eine Auswahl mehrerer Options-IDs. + +Feld-Frage: "${field.label ?? ''}" +Mögliche Optionen: +${optionsList} + +Antworte AUSSCHLIESSLICH mit einem JSON-Objekt der Form: +{ "optionIds": ["", ""], "confidence": "high" | "low" } + +- "high" wenn die Zuordnung eindeutig ist +- "low" wenn unklar +- Leeres Array wenn keine Option passt`, + userPrompt: freeText, + }; + } + if (field.type === 'yes_no') { + return { + systemPrompt: `Du klassifizierst eine freitext-Antwort als Ja/Nein. + +Feld-Frage: "${field.label ?? ''}" + +Antworte AUSSCHLIESSLICH mit einem JSON-Objekt: +{ "value": true | false | null, "confidence": "high" | "low" } + +- value=null + low wenn die Antwort weder klar Ja noch klar Nein ist`, + userPrompt: freeText, + }; + } + // rating + const max = field.config?.ratingScale ?? 5; + return { + systemPrompt: `Du extrahierst eine Bewertung im Bereich 1..${max} aus einer freitext-Antwort. + +Feld-Frage: "${field.label ?? ''}" + +Antworte AUSSCHLIESSLICH mit JSON: +{ "value": , "confidence": "high" | "low" }`, + userPrompt: freeText, + }; +} + +function parseExtractResult( + field: NonNullable[number], + raw: unknown +): { extracted: unknown; confidence: 'high' | 'low' } | null { + if (!raw || typeof raw !== 'object') return null; + const obj = raw as Record; + const confidence = obj.confidence === 'high' || obj.confidence === 'low' ? obj.confidence : 'low'; + const opts = field.options ?? []; + + if (field.type === 'single_choice') { + const id = obj.optionId; + if (id === null) return { extracted: null, confidence: 'low' }; + if (typeof id !== 'string') return null; + if (!opts.some((o) => o.id === id)) return null; + return { extracted: id, confidence }; + } + if (field.type === 'multi_choice') { + const ids = obj.optionIds; + if (!Array.isArray(ids)) return null; + const filtered = ids.filter( + (id): id is string => typeof id === 'string' && opts.some((o) => o.id === id) + ); + return { extracted: filtered, confidence }; + } + if (field.type === 'yes_no') { + const v = obj.value; + if (v === true || v === false) return { extracted: v, confidence }; + if (v === null) return { extracted: null, confidence: 'low' }; + return null; + } + if (field.type === 'rating') { + const v = obj.value; + const max = field.config?.ratingScale ?? 5; + if (v === null) return { extracted: null, confidence: 'low' }; + if (typeof v !== 'number') return null; + const n = Math.round(v); + if (n < 1 || n > max) return null; + return { extracted: n, confidence }; + } + return null; +} + export const formsPublicRoutes = routes; diff --git a/apps/mana/apps/web/src/lib/modules/forms/ConversationFormView.svelte b/apps/mana/apps/web/src/lib/modules/forms/ConversationFormView.svelte index 65e051c70..6178fa366 100644 --- a/apps/mana/apps/web/src/lib/modules/forms/ConversationFormView.svelte +++ b/apps/mana/apps/web/src/lib/modules/forms/ConversationFormView.svelte @@ -55,6 +55,19 @@ let submitted = $state(false); let submitError = $state(null); + // M9b — free-text "Beschreib's mir" input for choice/yes_no/rating + // fields. The /conversation/extract endpoint maps it to a typed + // answer via mana-llm. Result lands in extractedDraft so the user + // can confirm or override before commit. + let freeTextDraft = $state(''); + let extracting = $state(false); + let extractedDraft = $state<{ + extracted: AnswerValue; + confidence: 'high' | 'low'; + displayText: string; + } | null>(null); + let extractError = $state(null); + // Conversation history — pairs of (questionFieldId, displayValue) // rendered as chat bubbles above the active step. type Bubble = { kind: 'q'; text: string } | { kind: 'a'; text: string }; @@ -99,9 +112,83 @@ pushAnswerBubble(displayText || '—'); textDraft = ''; multiDraft = []; + freeTextDraft = ''; + extractedDraft = null; + extractError = null; stepIndex += 1; } + async function runExtract() { + if (!currentField) return; + const text = freeTextDraft.trim(); + if (!text) return; + extracting = true; + extractError = null; + try { + const url = `${apiBaseUrl()}/api/v1/forms/public/${encodeURIComponent(token)}/conversation/extract`; + const res = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ fieldId: currentField.id, freeText: text }), + }); + if (!res.ok) { + const txt = await res.text().catch(() => ''); + let msg: string | undefined; + try { + msg = JSON.parse(txt)?.message; + } catch { + msg = txt.slice(0, 200); + } + extractError = msg || `Extraktion fehlgeschlagen (${res.status})`; + return; + } + const json = (await res.json()) as { + extracted: AnswerValue; + confidence: 'high' | 'low'; + }; + extractedDraft = { + extracted: json.extracted, + confidence: json.confidence, + displayText: extractedDisplayText(currentField, json.extracted), + }; + } catch (err) { + extractError = + err instanceof Error ? err.message : 'Verbindung zur Extraktion fehlgeschlagen.'; + } finally { + extracting = false; + } + } + + function extractedDisplayText(field: FormField, value: AnswerValue): string { + if (value === null || value === undefined) return '(unklar)'; + if (typeof value === 'boolean') return value ? 'Ja' : 'Nein'; + if (typeof value === 'number') return String(value); + if (Array.isArray(value)) { + return value.map((id) => field.options?.find((o) => o.id === id)?.label ?? id).join(', '); + } + // single_choice option-id + return field.options?.find((o) => o.id === value)?.label ?? String(value); + } + + function commitExtract() { + if (!currentField || !extractedDraft) return; + setAnswerAndAdvance(extractedDraft.extracted, extractedDraft.displayText); + } + + function cancelExtract() { + freeTextDraft = ''; + extractedDraft = null; + extractError = null; + } + + const showFreeTextOption = $derived( + !!currentField && + (currentField.type === 'single_choice' || + currentField.type === 'multi_choice' || + currentField.type === 'yes_no' || + currentField.type === 'rating') + ); + function handleTextSubmit() { if (!currentField) return; const trimmed = textDraft.trim(); @@ -429,6 +516,59 @@ {/if} + {#if showFreeTextOption} +
+ Lieber in eigenen Worten antworten? + {#if extractedDraft} +
+

Verstanden als:

+

{extractedDraft.displayText}

+ {#if extractedDraft.confidence === 'low'} +

+ Klingt nicht eindeutig — bitte prüfe oder wähle direkt einen Button oben. +

+ {/if} +
+ + +
+
+ {:else} +
+ { + if (e.key === 'Enter' && !extracting && freeTextDraft.trim()) { + e.preventDefault(); + void runExtract(); + } + }} + /> + +
+ {#if extractError} +

{extractError}

+ {/if} + {/if} +
+ {/if} + {#if stepIndex > 0 && currentField.type !== 'section'} {/if} @@ -669,6 +809,66 @@ font-size: 0.8125rem; } + .conv-freetext { + margin-top: 0.25rem; + padding: 0.5rem 0; + border-top: 1px dashed #e5e7eb; + } + .conv-freetext summary { + font-size: 0.8125rem; + color: #6b7280; + cursor: pointer; + padding: 0.25rem 0; + } + .conv-freetext summary:hover { + color: #14b8a6; + } + .conv-freetext[open] summary { + margin-bottom: 0.5rem; + color: #374151; + } + + .conv-extract-preview { + display: flex; + flex-direction: column; + gap: 0.375rem; + padding: 0.625rem 0.875rem; + background: rgba(20, 184, 166, 0.1); + border: 1px solid rgba(20, 184, 166, 0.3); + border-radius: 0.5rem; + } + .conv-extract-preview.low-confidence { + background: rgba(245, 158, 11, 0.1); + border-color: rgba(245, 158, 11, 0.4); + } + .conv-extract-label { + margin: 0; + font-size: 0.6875rem; + text-transform: uppercase; + letter-spacing: 0.04em; + color: #6b7280; + } + .conv-extract-value { + margin: 0; + font-size: 0.9375rem; + font-weight: 500; + } + .conv-extract-hint { + margin: 0; + font-size: 0.75rem; + color: #92400e; + } + .conv-extract-actions { + display: flex; + gap: 0.375rem; + margin-top: 0.25rem; + } + .conv-extract-error { + margin: 0.375rem 0 0; + font-size: 0.8125rem; + color: #991b1b; + } + .conv-thanks { text-align: center; padding: 2rem 1rem;