From c3c02c6a224d59a1cc55e37c1a81f52f7e2e5c74 Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 2 Apr 2026 11:50:23 +0200 Subject: [PATCH] feat(manacore/web): add benchmark, compare history, markdown & cache status to LLM test - Benchmark tab: run same prompt N times against loaded model, show avg/min/max/median for latency and tok/s with per-run table and bars - Compare history: save results to localStorage (max 20), restore or delete previous comparisons via History panel - Markdown rendering: use marked for assistant responses in chat tab and compare outputs with proper code blocks, lists, headings - Model cache status: check browser Cache API via WebLLM hasModelInCache, show cached/uncached per model with size badges - Add marked dependency Co-Authored-By: Claude Opus 4.6 (1M context) --- apps/manacore/apps/web/package.json | 17 +- .../src/routes/(app)/llm-test/+page.svelte | 620 +++++++++++++++--- 2 files changed, 523 insertions(+), 114 deletions(-) diff --git a/apps/manacore/apps/web/package.json b/apps/manacore/apps/web/package.json index 2ac5b3f79..5c6928b96 100644 --- a/apps/manacore/apps/web/package.json +++ b/apps/manacore/apps/web/package.json @@ -42,9 +42,13 @@ "vitest": "^4.0.14" }, "dependencies": { + "@calc/shared": "workspace:*", + "@clock/shared": "workspace:*", "@manacore/credits": "workspace:^", + "@manacore/feedback": "workspace:*", + "@manacore/help": "workspace:*", + "@manacore/local-llm": "workspace:*", "@manacore/local-store": "workspace:*", - "@manacore/shared-links": "workspace:*", "@manacore/qr-export": "workspace:*", "@manacore/shared-auth": "workspace:*", "@manacore/shared-auth-stores": "workspace:*", @@ -52,30 +56,27 @@ "@manacore/shared-branding": "workspace:*", "@manacore/shared-config": "workspace:*", "@manacore/shared-error-tracking": "workspace:*", - "@manacore/feedback": "workspace:*", "@manacore/shared-i18n": "workspace:*", - "@manacore/help": "workspace:*", "@manacore/shared-icons": "workspace:*", + "@manacore/shared-links": "workspace:*", "@manacore/shared-profile-ui": "workspace:*", "@manacore/shared-stores": "workspace:*", "@manacore/shared-tags": "workspace:*", - "@manacore/subscriptions": "workspace:*", "@manacore/shared-tailwind": "workspace:*", "@manacore/shared-theme": "workspace:*", "@manacore/shared-theme-ui": "workspace:*", "@manacore/shared-types": "workspace:*", - "@manacore/shared-uload": "workspace:*", "@manacore/shared-ui": "workspace:*", + "@manacore/shared-uload": "workspace:*", "@manacore/shared-utils": "workspace:*", "@manacore/spiral-db": "workspace:*", + "@manacore/subscriptions": "workspace:*", "@manacore/wallpaper-generator": "workspace:*", - "@calc/shared": "workspace:*", - "@clock/shared": "workspace:*", "@zitare/content": "workspace:*", "date-fns": "^4.1.0", "dexie": "^4.0.11", + "marked": "^17.0.5", "svelte-dnd-action": "^0.9.68", - "leaflet": "^1.9.4", "svelte-i18n": "^4.0.0" }, "type": "module" diff --git a/apps/manacore/apps/web/src/routes/(app)/llm-test/+page.svelte b/apps/manacore/apps/web/src/routes/(app)/llm-test/+page.svelte index 6a9b0d70b..2aa7f45c7 100644 --- a/apps/manacore/apps/web/src/routes/(app)/llm-test/+page.svelte +++ b/apps/manacore/apps/web/src/routes/(app)/llm-test/+page.svelte @@ -10,13 +10,40 @@ MODELS, type ModelKey, } from '@manacore/local-llm'; - import { Robot, Trash, PaperPlaneRight } from '@manacore/shared-icons'; + import { marked } from 'marked'; + import { Robot, Trash, PaperPlaneRight, ClockCounterClockwise } from '@manacore/shared-icons'; const modelKeys = Object.keys(MODELS) as ModelKey[]; + // --- Markdown rendering --- + marked.setOptions({ breaks: true, gfm: true }); + + function renderMarkdown(text: string): string { + return marked.parse(text, { async: false }) as string; + } + + // --- Model cache status --- + let modelCacheStatus = $state>({}); + + async function checkModelCache() { + if (typeof caches === 'undefined') return; + for (const [key, config] of Object.entries(MODELS)) { + try { + const { hasModelInCache } = await import('@mlc-ai/web-llm'); + modelCacheStatus[key] = await hasModelInCache(config.modelId); + } catch { + modelCacheStatus[key] = false; + } + } + } + + if (typeof window !== 'undefined') { + checkModelCache(); + } + // --- State --- let selectedModel: ModelKey = $state('qwen-2.5-1.5b'); - let activeTab: 'chat' | 'extract' | 'classify' | 'compare' = $state('chat'); + let activeTab: 'chat' | 'extract' | 'classify' | 'compare' | 'benchmark' = $state('chat'); const supported = isLocalLlmSupported(); const status = getLocalLlmStatus(); @@ -56,6 +83,16 @@ error?: string; } + interface CompareHistoryEntry { + id: string; + timestamp: number; + prompt: string; + systemPrompt: string; + temperature: number; + maxTokens: number; + results: CompareResult[]; + } + let comparePrompt = $state(''); let compareSystemPrompt = $state(''); let compareTemperature = $state(0.7); @@ -64,6 +101,79 @@ let compareRunning = $state(false); let compareCurrentModel = $state(null); let compareStreamingContent = $state(''); + let compareHistory = $state([]); + let showHistory = $state(false); + + function loadCompareHistory() { + try { + const stored = localStorage.getItem('llm-compare-history'); + if (stored) compareHistory = JSON.parse(stored); + } catch { + /* ignore */ + } + } + + function saveCompareHistory() { + try { + localStorage.setItem('llm-compare-history', JSON.stringify(compareHistory)); + } catch { + /* ignore */ + } + } + + function deleteHistoryEntry(id: string) { + compareHistory = compareHistory.filter((e) => e.id !== id); + saveCompareHistory(); + } + + function restoreHistoryEntry(entry: CompareHistoryEntry) { + comparePrompt = entry.prompt; + compareSystemPrompt = entry.systemPrompt; + compareTemperature = entry.temperature; + compareMaxTokens = entry.maxTokens; + compareResults = entry.results; + showHistory = false; + } + + if (typeof window !== 'undefined') { + loadCompareHistory(); + } + + // Benchmark tab + interface BenchmarkRun { + iteration: number; + latencyMs: number; + tokPerSec: number; + completionTokens: number; + } + + interface BenchmarkStats { + runs: BenchmarkRun[]; + avgLatency: number; + minLatency: number; + maxLatency: number; + medianLatency: number; + avgTokPerSec: number; + minTokPerSec: number; + maxTokPerSec: number; + medianTokPerSec: number; + totalTokens: number; + } + + let benchmarkPrompt = $state(''); + let benchmarkSystemPrompt = $state(''); + let benchmarkIterations = $state(5); + let benchmarkTemperature = $state(0.7); + let benchmarkMaxTokens = $state(256); + let benchmarkRunning = $state(false); + let benchmarkCurrentRun = $state(0); + let benchmarkStats = $state(null); + + function median(arr: number[]): number { + const sorted = [...arr].sort((a, b) => a - b); + const mid = Math.floor(sorted.length / 2); + return sorted.length % 2 !== 0 ? sorted[mid] : Math.round((sorted[mid - 1] + sorted[mid]) / 2); + } // --- Derived --- let isReady = $derived(status.current.state === 'ready'); @@ -95,7 +205,6 @@ let modelInfo = $derived(MODELS[selectedModel]); - // Auto-scroll chat to bottom on new messages/streaming $effect(() => { messages.length; streamingContent; @@ -107,6 +216,7 @@ // --- Actions --- async function handleLoad() { await loadLocalLlm(selectedModel); + checkModelCache(); } async function handleUnload() { @@ -119,7 +229,6 @@ async function handleSend() { if (!userInput.trim() || isGenerating) return; - const userMsg = userInput.trim(); messages = [...messages, { role: 'user', content: userMsg }]; userInput = ''; @@ -128,12 +237,8 @@ try { const msgs: { role: 'system' | 'user' | 'assistant'; content: string }[] = []; - if (systemPrompt.trim()) { - msgs.push({ role: 'system', content: systemPrompt.trim() }); - } - for (const m of messages) { - msgs.push({ role: m.role, content: m.content }); - } + if (systemPrompt.trim()) msgs.push({ role: 'system', content: systemPrompt.trim() }); + for (const m of messages) msgs.push({ role: m.role, content: m.content }); const result = await generate({ messages: msgs, @@ -203,18 +308,15 @@ compareStreamingContent = ''; const msgs: { role: 'system' | 'user'; content: string }[] = []; - if (compareSystemPrompt.trim()) { + if (compareSystemPrompt.trim()) msgs.push({ role: 'system', content: compareSystemPrompt.trim() }); - } msgs.push({ role: 'user', content: comparePrompt.trim() }); for (const modelKey of modelKeys) { compareCurrentModel = MODELS[modelKey].displayName; compareStreamingContent = ''; - try { await loadLocalLlm(modelKey); - const result = await generate({ messages: msgs, temperature: compareTemperature, @@ -223,12 +325,10 @@ compareStreamingContent += token; }, }); - const tokPerSec = result.latencyMs > 0 ? Math.round((result.usage.completion_tokens / result.latencyMs) * 1000) : 0; - compareResults = [ ...compareResults, { @@ -256,13 +356,81 @@ }, ]; } - await unloadLocalLlm(); } + const entry: CompareHistoryEntry = { + id: crypto.randomUUID(), + timestamp: Date.now(), + prompt: comparePrompt, + systemPrompt: compareSystemPrompt, + temperature: compareTemperature, + maxTokens: compareMaxTokens, + results: compareResults, + }; + compareHistory = [entry, ...compareHistory].slice(0, 20); + saveCompareHistory(); + compareCurrentModel = null; compareStreamingContent = ''; compareRunning = false; + checkModelCache(); + } + + async function handleBenchmark() { + if (!benchmarkPrompt.trim() || benchmarkRunning || !isReady) return; + benchmarkRunning = true; + benchmarkStats = null; + benchmarkCurrentRun = 0; + + const msgs: { role: 'system' | 'user'; content: string }[] = []; + if (benchmarkSystemPrompt.trim()) + msgs.push({ role: 'system', content: benchmarkSystemPrompt.trim() }); + msgs.push({ role: 'user', content: benchmarkPrompt.trim() }); + + const runs: BenchmarkRun[] = []; + for (let i = 0; i < benchmarkIterations; i++) { + benchmarkCurrentRun = i + 1; + try { + const result = await generate({ + messages: msgs, + temperature: benchmarkTemperature, + maxTokens: benchmarkMaxTokens, + }); + const tokPerSec = + result.latencyMs > 0 + ? Math.round((result.usage.completion_tokens / result.latencyMs) * 1000) + : 0; + runs.push({ + iteration: i + 1, + latencyMs: result.latencyMs, + tokPerSec, + completionTokens: result.usage.completion_tokens, + }); + } catch { + runs.push({ iteration: i + 1, latencyMs: 0, tokPerSec: 0, completionTokens: 0 }); + } + } + + const latencies = runs.map((r) => r.latencyMs).filter((l) => l > 0); + const speeds = runs.map((r) => r.tokPerSec).filter((s) => s > 0); + benchmarkStats = { + runs, + avgLatency: latencies.length + ? Math.round(latencies.reduce((a, b) => a + b, 0) / latencies.length) + : 0, + minLatency: latencies.length ? Math.min(...latencies) : 0, + maxLatency: latencies.length ? Math.max(...latencies) : 0, + medianLatency: latencies.length ? median(latencies) : 0, + avgTokPerSec: speeds.length + ? Math.round(speeds.reduce((a, b) => a + b, 0) / speeds.length) + : 0, + minTokPerSec: speeds.length ? Math.min(...speeds) : 0, + maxTokPerSec: speeds.length ? Math.max(...speeds) : 0, + medianTokPerSec: speeds.length ? median(speeds) : 0, + totalTokens: runs.reduce((a, r) => a + r.completionTokens, 0), + }; + benchmarkRunning = false; } function handleClear() { @@ -278,10 +446,29 @@ handleSend(); } } + + function formatTime(ts: number): string { + return new Date(ts).toLocaleString('de-DE', { + day: '2-digit', + month: '2-digit', + hour: '2-digit', + minute: '2-digit', + }); + } Local LLM Test - ManaCore + {@html ``}
@@ -296,12 +483,11 @@

WebGPU nicht verfügbar

- Dieses Feature benötigt einen Browser mit WebGPU-Support (Chrome 113+, Edge 113+). Safari - und Firefox haben experimentelle Unterstützung. + Dieses Feature benötigt einen Browser mit WebGPU-Support (Chrome 113+, Edge 113+).

{:else} - + {#if activeTab !== 'compare'}
@@ -316,35 +502,39 @@ class="rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50" > {#each Object.entries(MODELS) as [key, model]} - + {/each}
-
Download: ~{modelInfo.downloadSizeMb} MB RAM: ~{modelInfo.ramUsageMb} MB + {#if modelCacheStatus[selectedModel] !== undefined} + + {modelCacheStatus[selectedModel] ? 'Im Cache' : 'Nicht im Cache'} + + {/if}
-
{#if isReady} - Entladen - {:else} - {isLoading ? 'Lädt...' : 'Modell laden'} - {/if}
-
{statusText}
- {#if progress !== null}
{/if} + + {#if Object.keys(modelCacheStatus).length > 0} +
+ {#each modelKeys as key} +
+
+ {MODELS[key].displayName} + {#if modelCacheStatus[key]} + ~{MODELS[key].downloadSizeMb} MB + {/if} +
+ {/each} +
+ {/if}
{/if}
- {#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }] as tab} + {#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }, { id: 'benchmark', label: 'Benchmark' }] as tab} - {tab.label} - {/each}
@@ -394,7 +601,6 @@ placeholder="System Prompt (optional)..." class="rounded-xl border border-border bg-card px-4 py-2.5 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none" /> -
{msg.role === 'user' ? 'Du' : modelInfo.displayName}
-
{msg.content}
+ {#if msg.role === 'assistant'} +
+ {@html renderMarkdown(msg.content)} +
+ {:else} +
{msg.content}
+ {/if}
{/each} - {#if streamingContent}
{modelInfo.displayName}
-
- {streamingContent}| +
+ {@html renderMarkdown(streamingContent)}|
{/if} {/if}
- {#if lastLatency !== null}
Latenz: {lastLatency}ms {#if lastTokens} Prompt: {lastTokens.prompt} tokens Completion: {lastTokens.completion} tokens - - Speed: {lastLatency > 0 + Speed: {lastLatency > 0 ? Math.round((lastTokens.completion / lastLatency) * 1000) - : 0} tok/s - + : 0} tok/s {/if}
{/if} -
-
@@ -614,21 +856,21 @@ onclick={handleCompare} disabled={!comparePrompt.trim() || compareRunning} class="rounded-lg bg-primary px-5 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50" + >{compareRunning + ? 'Läuft...' + : `Alle ${modelKeys.length} Modelle vergleichen`} - {compareRunning ? 'Läuft...' : `Alle ${modelKeys.length} Modelle vergleichen`} -
- {#if compareRunning && compareCurrentModel}
{compareCurrentModel} - - ({compareResults.length + 1}/{modelKeys.length}) - + ({compareResults.length + 1}/{modelKeys.length})
{#if compareStreamingContent}
{/if} - {#if compareResults.length > 0} -
@@ -652,51 +892,42 @@ - {#each compareResults as r} - - {/each} + {#each compareResults as r}{/each} - {#each compareResults as r} - - {/each} + {#each compareResults as r}{/each} - {#each compareResults as r} - - {/each} + {#each compareResults as r}{/each} - {#each compareResults as r} - - {/each} + {#each compareResults as r}{/each} - {#each compareResults as r} - - {/each} + {#each compareResults as r}{/each}
Metrik - {r.displayName} - {r.displayName}
Latenz - {r.error ? 'Fehler' : `${(r.latencyMs / 1000).toFixed(1)}s`} - {r.error ? 'Fehler' : `${(r.latencyMs / 1000).toFixed(1)}s`}
Speed - {r.error ? '—' : `${r.tokPerSec} tok/s`} - {r.error ? '—' : `${r.tokPerSec} tok/s`}
Prompt Tokens - {r.error ? '—' : r.promptTokens} - {r.error ? '—' : r.promptTokens}
Completion Tokens - {r.error ? '—' : r.completionTokens} - {r.error ? '—' : r.completionTokens}
-
{#if r.error} -
- {r.error} -
+
{r.error}
{:else} -
- {r.content} +
+ {@html renderMarkdown(r.content)}
{/if}
@@ -726,5 +953,186 @@ {/if}
{/if} + + + {#if activeTab === 'benchmark'} +
+
+

+ Denselben Prompt N-mal gegen das geladene Modell laufen lassen, um Varianz zu messen. +

+ + {#if !isReady} +
+ Lade zuerst ein Modell im Model-Controls-Bereich oben. +
+ {:else} + + +
+
+ + +
+
+ + +
+
+ + +
+ +
+ {/if} +
+ + {#if benchmarkRunning} +
+
+
+ {modelInfo.displayName} — Run {benchmarkCurrentRun}/{benchmarkIterations} +
+
+
+
+
+ {/if} + + {#if benchmarkStats} +
+
+
Latenz (ms)
+
+ Durchschnitt{benchmarkStats.avgLatency} + Median{benchmarkStats.medianLatency} + Min{benchmarkStats.minLatency} + Max{benchmarkStats.maxLatency} +
+
+
+
Speed (tok/s)
+
+ Durchschnitt{benchmarkStats.avgTokPerSec} + Median{benchmarkStats.medianTokPerSec} + Max{benchmarkStats.maxTokPerSec} + Min{benchmarkStats.minTokPerSec} +
+
+
+ +
+ + + + + + + + + + + + {#each benchmarkStats.runs as run} + {@const maxLat = benchmarkStats.maxLatency || 1} + + + + + + + + {/each} + +
RunLatenztok/sTokensVerteilung
#{run.iteration}{(run.latencyMs / 1000).toFixed(2)}s{run.tokPerSec}{run.completionTokens}
+
+
+
+
+ Total: {benchmarkStats.totalTokens} tokens in {benchmarkStats.runs.length} runs ({modelInfo.displayName}) +
+ {/if} +
+ {/if} {/if}