feat(manacore/web): add benchmark, compare history, markdown & cache status to LLM test

- Benchmark tab: run same prompt N times against loaded model, show
  avg/min/max/median for latency and tok/s with per-run table and bars
- Compare history: save results to localStorage (max 20), restore or
  delete previous comparisons via History panel
- Markdown rendering: use marked for assistant responses in chat tab
  and compare outputs with proper code blocks, lists, headings
- Model cache status: check browser Cache API via WebLLM hasModelInCache,
  show cached/uncached per model with size badges
- Add marked dependency

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-02 11:50:23 +02:00
parent 348b6ff231
commit c3c02c6a22
2 changed files with 523 additions and 114 deletions

View file

@ -42,9 +42,13 @@
"vitest": "^4.0.14"
},
"dependencies": {
"@calc/shared": "workspace:*",
"@clock/shared": "workspace:*",
"@manacore/credits": "workspace:^",
"@manacore/feedback": "workspace:*",
"@manacore/help": "workspace:*",
"@manacore/local-llm": "workspace:*",
"@manacore/local-store": "workspace:*",
"@manacore/shared-links": "workspace:*",
"@manacore/qr-export": "workspace:*",
"@manacore/shared-auth": "workspace:*",
"@manacore/shared-auth-stores": "workspace:*",
@ -52,30 +56,27 @@
"@manacore/shared-branding": "workspace:*",
"@manacore/shared-config": "workspace:*",
"@manacore/shared-error-tracking": "workspace:*",
"@manacore/feedback": "workspace:*",
"@manacore/shared-i18n": "workspace:*",
"@manacore/help": "workspace:*",
"@manacore/shared-icons": "workspace:*",
"@manacore/shared-links": "workspace:*",
"@manacore/shared-profile-ui": "workspace:*",
"@manacore/shared-stores": "workspace:*",
"@manacore/shared-tags": "workspace:*",
"@manacore/subscriptions": "workspace:*",
"@manacore/shared-tailwind": "workspace:*",
"@manacore/shared-theme": "workspace:*",
"@manacore/shared-theme-ui": "workspace:*",
"@manacore/shared-types": "workspace:*",
"@manacore/shared-uload": "workspace:*",
"@manacore/shared-ui": "workspace:*",
"@manacore/shared-uload": "workspace:*",
"@manacore/shared-utils": "workspace:*",
"@manacore/spiral-db": "workspace:*",
"@manacore/subscriptions": "workspace:*",
"@manacore/wallpaper-generator": "workspace:*",
"@calc/shared": "workspace:*",
"@clock/shared": "workspace:*",
"@zitare/content": "workspace:*",
"date-fns": "^4.1.0",
"dexie": "^4.0.11",
"marked": "^17.0.5",
"svelte-dnd-action": "^0.9.68",
"leaflet": "^1.9.4",
"svelte-i18n": "^4.0.0"
},
"type": "module"

View file

@ -10,13 +10,40 @@
MODELS,
type ModelKey,
} from '@manacore/local-llm';
import { Robot, Trash, PaperPlaneRight } from '@manacore/shared-icons';
import { marked } from 'marked';
import { Robot, Trash, PaperPlaneRight, ClockCounterClockwise } from '@manacore/shared-icons';
const modelKeys = Object.keys(MODELS) as ModelKey[];
// --- Markdown rendering ---
marked.setOptions({ breaks: true, gfm: true });
function renderMarkdown(text: string): string {
return marked.parse(text, { async: false }) as string;
}
// --- Model cache status ---
let modelCacheStatus = $state<Record<string, boolean>>({});
async function checkModelCache() {
if (typeof caches === 'undefined') return;
for (const [key, config] of Object.entries(MODELS)) {
try {
const { hasModelInCache } = await import('@mlc-ai/web-llm');
modelCacheStatus[key] = await hasModelInCache(config.modelId);
} catch {
modelCacheStatus[key] = false;
}
}
}
if (typeof window !== 'undefined') {
checkModelCache();
}
// --- State ---
let selectedModel: ModelKey = $state('qwen-2.5-1.5b');
let activeTab: 'chat' | 'extract' | 'classify' | 'compare' = $state('chat');
let activeTab: 'chat' | 'extract' | 'classify' | 'compare' | 'benchmark' = $state('chat');
const supported = isLocalLlmSupported();
const status = getLocalLlmStatus();
@ -56,6 +83,16 @@
error?: string;
}
interface CompareHistoryEntry {
id: string;
timestamp: number;
prompt: string;
systemPrompt: string;
temperature: number;
maxTokens: number;
results: CompareResult[];
}
let comparePrompt = $state('');
let compareSystemPrompt = $state('');
let compareTemperature = $state(0.7);
@ -64,6 +101,79 @@
let compareRunning = $state(false);
let compareCurrentModel = $state<string | null>(null);
let compareStreamingContent = $state('');
let compareHistory = $state<CompareHistoryEntry[]>([]);
let showHistory = $state(false);
function loadCompareHistory() {
try {
const stored = localStorage.getItem('llm-compare-history');
if (stored) compareHistory = JSON.parse(stored);
} catch {
/* ignore */
}
}
function saveCompareHistory() {
try {
localStorage.setItem('llm-compare-history', JSON.stringify(compareHistory));
} catch {
/* ignore */
}
}
function deleteHistoryEntry(id: string) {
compareHistory = compareHistory.filter((e) => e.id !== id);
saveCompareHistory();
}
function restoreHistoryEntry(entry: CompareHistoryEntry) {
comparePrompt = entry.prompt;
compareSystemPrompt = entry.systemPrompt;
compareTemperature = entry.temperature;
compareMaxTokens = entry.maxTokens;
compareResults = entry.results;
showHistory = false;
}
if (typeof window !== 'undefined') {
loadCompareHistory();
}
// Benchmark tab
interface BenchmarkRun {
iteration: number;
latencyMs: number;
tokPerSec: number;
completionTokens: number;
}
interface BenchmarkStats {
runs: BenchmarkRun[];
avgLatency: number;
minLatency: number;
maxLatency: number;
medianLatency: number;
avgTokPerSec: number;
minTokPerSec: number;
maxTokPerSec: number;
medianTokPerSec: number;
totalTokens: number;
}
let benchmarkPrompt = $state('');
let benchmarkSystemPrompt = $state('');
let benchmarkIterations = $state(5);
let benchmarkTemperature = $state(0.7);
let benchmarkMaxTokens = $state(256);
let benchmarkRunning = $state(false);
let benchmarkCurrentRun = $state(0);
let benchmarkStats = $state<BenchmarkStats | null>(null);
function median(arr: number[]): number {
const sorted = [...arr].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2);
return sorted.length % 2 !== 0 ? sorted[mid] : Math.round((sorted[mid - 1] + sorted[mid]) / 2);
}
// --- Derived ---
let isReady = $derived(status.current.state === 'ready');
@ -95,7 +205,6 @@
let modelInfo = $derived(MODELS[selectedModel]);
// Auto-scroll chat to bottom on new messages/streaming
$effect(() => {
messages.length;
streamingContent;
@ -107,6 +216,7 @@
// --- Actions ---
async function handleLoad() {
await loadLocalLlm(selectedModel);
checkModelCache();
}
async function handleUnload() {
@ -119,7 +229,6 @@
async function handleSend() {
if (!userInput.trim() || isGenerating) return;
const userMsg = userInput.trim();
messages = [...messages, { role: 'user', content: userMsg }];
userInput = '';
@ -128,12 +237,8 @@
try {
const msgs: { role: 'system' | 'user' | 'assistant'; content: string }[] = [];
if (systemPrompt.trim()) {
msgs.push({ role: 'system', content: systemPrompt.trim() });
}
for (const m of messages) {
msgs.push({ role: m.role, content: m.content });
}
if (systemPrompt.trim()) msgs.push({ role: 'system', content: systemPrompt.trim() });
for (const m of messages) msgs.push({ role: m.role, content: m.content });
const result = await generate({
messages: msgs,
@ -203,18 +308,15 @@
compareStreamingContent = '';
const msgs: { role: 'system' | 'user'; content: string }[] = [];
if (compareSystemPrompt.trim()) {
if (compareSystemPrompt.trim())
msgs.push({ role: 'system', content: compareSystemPrompt.trim() });
}
msgs.push({ role: 'user', content: comparePrompt.trim() });
for (const modelKey of modelKeys) {
compareCurrentModel = MODELS[modelKey].displayName;
compareStreamingContent = '';
try {
await loadLocalLlm(modelKey);
const result = await generate({
messages: msgs,
temperature: compareTemperature,
@ -223,12 +325,10 @@
compareStreamingContent += token;
},
});
const tokPerSec =
result.latencyMs > 0
? Math.round((result.usage.completion_tokens / result.latencyMs) * 1000)
: 0;
compareResults = [
...compareResults,
{
@ -256,13 +356,81 @@
},
];
}
await unloadLocalLlm();
}
const entry: CompareHistoryEntry = {
id: crypto.randomUUID(),
timestamp: Date.now(),
prompt: comparePrompt,
systemPrompt: compareSystemPrompt,
temperature: compareTemperature,
maxTokens: compareMaxTokens,
results: compareResults,
};
compareHistory = [entry, ...compareHistory].slice(0, 20);
saveCompareHistory();
compareCurrentModel = null;
compareStreamingContent = '';
compareRunning = false;
checkModelCache();
}
async function handleBenchmark() {
if (!benchmarkPrompt.trim() || benchmarkRunning || !isReady) return;
benchmarkRunning = true;
benchmarkStats = null;
benchmarkCurrentRun = 0;
const msgs: { role: 'system' | 'user'; content: string }[] = [];
if (benchmarkSystemPrompt.trim())
msgs.push({ role: 'system', content: benchmarkSystemPrompt.trim() });
msgs.push({ role: 'user', content: benchmarkPrompt.trim() });
const runs: BenchmarkRun[] = [];
for (let i = 0; i < benchmarkIterations; i++) {
benchmarkCurrentRun = i + 1;
try {
const result = await generate({
messages: msgs,
temperature: benchmarkTemperature,
maxTokens: benchmarkMaxTokens,
});
const tokPerSec =
result.latencyMs > 0
? Math.round((result.usage.completion_tokens / result.latencyMs) * 1000)
: 0;
runs.push({
iteration: i + 1,
latencyMs: result.latencyMs,
tokPerSec,
completionTokens: result.usage.completion_tokens,
});
} catch {
runs.push({ iteration: i + 1, latencyMs: 0, tokPerSec: 0, completionTokens: 0 });
}
}
const latencies = runs.map((r) => r.latencyMs).filter((l) => l > 0);
const speeds = runs.map((r) => r.tokPerSec).filter((s) => s > 0);
benchmarkStats = {
runs,
avgLatency: latencies.length
? Math.round(latencies.reduce((a, b) => a + b, 0) / latencies.length)
: 0,
minLatency: latencies.length ? Math.min(...latencies) : 0,
maxLatency: latencies.length ? Math.max(...latencies) : 0,
medianLatency: latencies.length ? median(latencies) : 0,
avgTokPerSec: speeds.length
? Math.round(speeds.reduce((a, b) => a + b, 0) / speeds.length)
: 0,
minTokPerSec: speeds.length ? Math.min(...speeds) : 0,
maxTokPerSec: speeds.length ? Math.max(...speeds) : 0,
medianTokPerSec: speeds.length ? median(speeds) : 0,
totalTokens: runs.reduce((a, r) => a + r.completionTokens, 0),
};
benchmarkRunning = false;
}
function handleClear() {
@ -278,10 +446,29 @@
handleSend();
}
}
function formatTime(ts: number): string {
return new Date(ts).toLocaleString('de-DE', {
day: '2-digit',
month: '2-digit',
hour: '2-digit',
minute: '2-digit',
});
}
</script>
<svelte:head>
<title>Local LLM Test - ManaCore</title>
{@html `<style>
.llm-prose { line-height: 1.6; }
.llm-prose p { margin: 0.4em 0; }
.llm-prose pre { background: var(--color-muted, #1e1e2e); border-radius: 0.5rem; padding: 0.75rem; overflow-x: auto; margin: 0.5em 0; }
.llm-prose code { font-size: 0.85em; background: var(--color-muted, #1e1e2e); padding: 0.15em 0.3em; border-radius: 0.25rem; }
.llm-prose pre code { background: none; padding: 0; }
.llm-prose ul, .llm-prose ol { padding-left: 1.5em; margin: 0.4em 0; }
.llm-prose h1, .llm-prose h2, .llm-prose h3 { margin: 0.6em 0 0.3em; font-weight: 600; }
.llm-prose blockquote { border-left: 3px solid var(--color-border, #444); padding-left: 0.75em; margin: 0.4em 0; opacity: 0.8; }
</style>`}
</svelte:head>
<div class="mx-auto max-w-4xl">
@ -296,12 +483,11 @@
<div class="rounded-xl border border-red-500/30 bg-red-500/10 p-6 text-center">
<p class="text-lg font-semibold text-red-400">WebGPU nicht verfügbar</p>
<p class="mt-2 text-sm text-muted-foreground">
Dieses Feature benötigt einen Browser mit WebGPU-Support (Chrome 113+, Edge 113+). Safari
und Firefox haben experimentelle Unterstützung.
Dieses Feature benötigt einen Browser mit WebGPU-Support (Chrome 113+, Edge 113+).
</p>
</div>
{:else}
<!-- Model Controls (hidden on Compare tab — it manages models itself) -->
<!-- Model Controls (hidden on Compare tab) -->
{#if activeTab !== 'compare'}
<div class="mb-6 rounded-xl border border-border bg-card p-4">
<div class="flex flex-wrap items-center gap-4">
@ -316,35 +502,39 @@
class="rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
>
{#each Object.entries(MODELS) as [key, model]}
<option value={key}>{model.displayName}</option>
<option value={key}
>{model.displayName}{modelCacheStatus[key] ? ' (cached)' : ''}</option
>
{/each}
</select>
</div>
<div class="flex flex-col gap-0.5 text-xs text-muted-foreground">
<span>Download: ~{modelInfo.downloadSizeMb} MB</span>
<span>RAM: ~{modelInfo.ramUsageMb} MB</span>
{#if modelCacheStatus[selectedModel] !== undefined}
<span
class={modelCacheStatus[selectedModel] ? 'text-green-500' : 'text-muted-foreground'}
>
{modelCacheStatus[selectedModel] ? 'Im Cache' : 'Nicht im Cache'}
</span>
{/if}
</div>
<div class="flex items-center gap-2">
{#if isReady}
<button
onclick={handleUnload}
class="rounded-lg border border-border px-4 py-1.5 text-sm text-muted-foreground transition-colors hover:bg-muted"
>Entladen</button
>
Entladen
</button>
{:else}
<button
onclick={handleLoad}
disabled={isLoading}
class="rounded-lg bg-primary px-4 py-1.5 text-sm font-medium text-primary-foreground transition-opacity disabled:opacity-50"
>{isLoading ? 'Lädt...' : 'Modell laden'}</button
>
{isLoading ? 'Lädt...' : 'Modell laden'}
</button>
{/if}
</div>
<div class="ml-auto flex items-center gap-2">
<div
class="h-2.5 w-2.5 rounded-full {isReady
@ -358,7 +548,6 @@
<span class="text-xs text-muted-foreground">{statusText}</span>
</div>
</div>
{#if progress !== null}
<div class="mt-3 h-2 overflow-hidden rounded-full bg-muted">
<div
@ -367,21 +556,39 @@
></div>
</div>
{/if}
<!-- Cache overview -->
{#if Object.keys(modelCacheStatus).length > 0}
<div class="mt-3 flex flex-wrap gap-2 border-t border-border pt-3">
{#each modelKeys as key}
<div
class="flex items-center gap-1.5 rounded-md border border-border px-2.5 py-1 text-xs"
>
<div
class="h-1.5 w-1.5 rounded-full {modelCacheStatus[key]
? 'bg-green-500'
: 'bg-muted-foreground/30'}"
></div>
<span class="text-muted-foreground">{MODELS[key].displayName}</span>
{#if modelCacheStatus[key]}
<span class="text-green-500">~{MODELS[key].downloadSizeMb} MB</span>
{/if}
</div>
{/each}
</div>
{/if}
</div>
{/if}
<!-- Tabs -->
<div class="mb-4 flex gap-1 rounded-lg border border-border bg-card p-1">
{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }] as tab}
{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }, { id: 'benchmark', label: 'Benchmark' }] as tab}
<button
onclick={() => (activeTab = tab.id as typeof activeTab)}
class="flex-1 rounded-md px-3 py-1.5 text-sm font-medium transition-colors {activeTab ===
tab.id
? 'bg-primary text-primary-foreground'
: 'text-muted-foreground hover:text-foreground'}"
: 'text-muted-foreground hover:text-foreground'}">{tab.label}</button
>
{tab.label}
</button>
{/each}
</div>
@ -394,7 +601,6 @@
placeholder="System Prompt (optional)..."
class="rounded-xl border border-border bg-card px-4 py-2.5 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
/>
<div
bind:this={chatContainer}
class="max-h-[60vh] min-h-[300px] space-y-3 overflow-y-auto rounded-xl border border-border bg-background/50 p-4"
@ -420,38 +626,41 @@
<div class="mb-1 text-xs font-medium text-muted-foreground">
{msg.role === 'user' ? 'Du' : modelInfo.displayName}
</div>
<div class="whitespace-pre-wrap text-sm text-foreground">{msg.content}</div>
{#if msg.role === 'assistant'}
<div class="llm-prose text-sm text-foreground">
{@html renderMarkdown(msg.content)}
</div>
{:else}
<div class="whitespace-pre-wrap text-sm text-foreground">{msg.content}</div>
{/if}
</div>
{/each}
{#if streamingContent}
<div class="mr-8 rounded-lg border border-border bg-card p-3">
<div class="mb-1 text-xs font-medium text-muted-foreground">
{modelInfo.displayName}
</div>
<div class="whitespace-pre-wrap text-sm text-foreground">
{streamingContent}<span class="animate-pulse">|</span>
<div class="llm-prose text-sm text-foreground">
{@html renderMarkdown(streamingContent)}<span class="animate-pulse">|</span>
</div>
</div>
{/if}
{/if}
</div>
{#if lastLatency !== null}
<div class="flex gap-4 text-xs text-muted-foreground">
<span>Latenz: {lastLatency}ms</span>
{#if lastTokens}
<span>Prompt: {lastTokens.prompt} tokens</span>
<span>Completion: {lastTokens.completion} tokens</span>
<span>
Speed: {lastLatency > 0
<span
>Speed: {lastLatency > 0
? Math.round((lastTokens.completion / lastLatency) * 1000)
: 0} tok/s
</span>
: 0} tok/s</span
>
{/if}
</div>
{/if}
<div class="flex gap-3">
<textarea
bind:value={userInput}
@ -466,15 +675,13 @@
onclick={handleSend}
disabled={!isReady || !userInput.trim() || isGenerating}
class="rounded-xl bg-primary px-4 py-3 text-sm font-medium text-primary-foreground transition-opacity disabled:opacity-50"
><PaperPlaneRight size={18} /></button
>
<PaperPlaneRight size={18} />
</button>
<button
onclick={handleClear}
class="rounded-xl border border-border px-4 py-3 text-sm text-muted-foreground transition-colors hover:bg-muted"
><Trash size={18} /></button
>
<Trash size={18} />
</button>
</div>
</div>
</div>
@ -485,8 +692,7 @@
<div class="flex flex-col gap-4">
<div class="rounded-xl border border-border bg-card p-4">
<p class="mb-3 text-sm text-muted-foreground">
Extrahiere strukturiertes JSON aus beliebigem Text. Das LLM analysiert den Text und gibt
ein JSON-Objekt zurück.
Extrahiere strukturiertes JSON aus beliebigem Text.
</p>
<input
type="text"
@ -504,11 +710,9 @@
onclick={handleExtract}
disabled={!isReady || !extractText.trim() || extractLoading}
class="mt-3 rounded-lg bg-primary px-4 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
>{extractLoading ? 'Extrahiere...' : 'JSON extrahieren'}</button
>
{extractLoading ? 'Extrahiere...' : 'JSON extrahieren'}
</button>
</div>
{#if extractResult}
<div class="rounded-xl border border-border bg-card p-4">
<div class="mb-2 text-xs font-medium text-muted-foreground">Ergebnis</div>
@ -524,8 +728,7 @@
<div class="flex flex-col gap-4">
<div class="rounded-xl border border-border bg-card p-4">
<p class="mb-3 text-sm text-muted-foreground">
Klassifiziere Text in eine von mehreren Kategorien. Das LLM wählt die passendste
Kategorie.
Klassifiziere Text in eine von mehreren Kategorien.
</p>
<input
type="text"
@ -543,11 +746,9 @@
onclick={handleClassify}
disabled={!isReady || !classifyText.trim() || classifyLoading}
class="mt-3 rounded-lg bg-primary px-4 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
>{classifyLoading ? 'Klassifiziere...' : 'Klassifizieren'}</button
>
{classifyLoading ? 'Klassifiziere...' : 'Klassifizieren'}
</button>
</div>
{#if classifyResult}
<div class="rounded-xl border border-border bg-card p-4">
<div class="mb-2 text-xs font-medium text-muted-foreground">Ergebnis</div>
@ -563,10 +764,53 @@
{#if activeTab === 'compare'}
<div class="flex flex-col gap-4">
<div class="rounded-xl border border-border bg-card p-4">
<p class="mb-3 text-sm text-muted-foreground">
Denselben Prompt sequentiell gegen alle {modelKeys.length} Modelle testen. Jedes Modell wird
geladen, inferiert und wieder entladen — die Ergebnisse erscheinen nebeneinander.
</p>
<div class="mb-3 flex items-center justify-between">
<p class="text-sm text-muted-foreground">
Denselben Prompt gegen alle {modelKeys.length} Modelle testen.
</p>
{#if compareHistory.length > 0}
<button
onclick={() => (showHistory = !showHistory)}
class="flex items-center gap-1.5 rounded-lg border border-border px-3 py-1 text-xs text-muted-foreground transition-colors hover:bg-muted"
>
<ClockCounterClockwise size={14} />
History ({compareHistory.length})
</button>
{/if}
</div>
{#if showHistory}
<div
class="mb-4 max-h-64 overflow-y-auto rounded-lg border border-border bg-background p-3"
>
{#each compareHistory as entry}
<div
class="flex items-center justify-between border-b border-border py-2 last:border-0"
>
<div class="min-w-0 flex-1">
<div class="truncate text-sm text-foreground">{entry.prompt}</div>
<div class="flex gap-3 text-xs text-muted-foreground">
<span>{formatTime(entry.timestamp)}</span>
<span>{entry.results.length} Modelle</span>
<span>T={entry.temperature}</span>
</div>
</div>
<div class="flex gap-1.5">
<button
onclick={() => restoreHistoryEntry(entry)}
class="rounded px-2.5 py-1 text-xs text-primary hover:bg-primary/10"
>Laden</button
>
<button
onclick={() => deleteHistoryEntry(entry.id)}
class="rounded px-2.5 py-1 text-xs text-red-400 hover:bg-red-500/10"
>&times;</button
>
</div>
</div>
{/each}
</div>
{/if}
<input
type="text"
@ -574,15 +818,13 @@
placeholder="System Prompt (optional)..."
class="mb-3 w-full rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
/>
<textarea
bind:value={comparePrompt}
placeholder="Prompt eingeben, der gegen alle Modelle getestet wird...&#10;&#10;z.B.: Erkläre Quantencomputing in 3 Sätzen."
placeholder="Prompt eingeben, der gegen alle Modelle getestet wird..."
rows={4}
disabled={compareRunning}
class="w-full resize-none rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none disabled:opacity-50"
></textarea>
<div class="mt-3 flex flex-wrap items-end gap-4">
<div class="flex flex-col gap-1">
<label for="compare-temp" class="text-xs text-muted-foreground">Temperature</label>
@ -614,21 +856,21 @@
onclick={handleCompare}
disabled={!comparePrompt.trim() || compareRunning}
class="rounded-lg bg-primary px-5 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
>{compareRunning
? 'Läuft...'
: `Alle ${modelKeys.length} Modelle vergleichen`}</button
>
{compareRunning ? 'Läuft...' : `Alle ${modelKeys.length} Modelle vergleichen`}
</button>
</div>
</div>
<!-- Running indicator -->
{#if compareRunning && compareCurrentModel}
<div class="rounded-xl border border-yellow-500/30 bg-yellow-500/5 p-4">
<div class="mb-2 flex items-center gap-2">
<div class="h-2.5 w-2.5 animate-pulse rounded-full bg-yellow-500"></div>
<span class="text-sm font-medium text-foreground">{compareCurrentModel}</span>
<span class="text-xs text-muted-foreground">
({compareResults.length + 1}/{modelKeys.length})
</span>
<span class="text-xs text-muted-foreground"
>({compareResults.length + 1}/{modelKeys.length})</span
>
</div>
{#if compareStreamingContent}
<div
@ -642,9 +884,7 @@
</div>
{/if}
<!-- Results -->
{#if compareResults.length > 0}
<!-- Stats comparison table -->
<div class="overflow-x-auto rounded-xl border border-border">
<table class="w-full text-sm">
<thead>
@ -652,51 +892,42 @@
<th class="px-4 py-2.5 text-left text-xs font-medium text-muted-foreground"
>Metrik</th
>
{#each compareResults as r}
<th class="px-4 py-2.5 text-left text-xs font-medium text-muted-foreground">
{r.displayName}
</th>
{/each}
{#each compareResults as r}<th
class="px-4 py-2.5 text-left text-xs font-medium text-muted-foreground"
>{r.displayName}</th
>{/each}
</tr>
</thead>
<tbody>
<tr class="border-b border-border">
<td class="px-4 py-2 text-muted-foreground">Latenz</td>
{#each compareResults as r}
<td class="px-4 py-2 font-mono {r.error ? 'text-red-400' : 'text-foreground'}">
{r.error ? 'Fehler' : `${(r.latencyMs / 1000).toFixed(1)}s`}
</td>
{/each}
{#each compareResults as r}<td
class="px-4 py-2 font-mono {r.error ? 'text-red-400' : 'text-foreground'}"
>{r.error ? 'Fehler' : `${(r.latencyMs / 1000).toFixed(1)}s`}</td
>{/each}
</tr>
<tr class="border-b border-border">
<td class="px-4 py-2 text-muted-foreground">Speed</td>
{#each compareResults as r}
<td class="px-4 py-2 font-mono text-foreground">
{r.error ? '—' : `${r.tokPerSec} tok/s`}
</td>
{/each}
{#each compareResults as r}<td class="px-4 py-2 font-mono text-foreground"
>{r.error ? '—' : `${r.tokPerSec} tok/s`}</td
>{/each}
</tr>
<tr class="border-b border-border">
<td class="px-4 py-2 text-muted-foreground">Prompt Tokens</td>
{#each compareResults as r}
<td class="px-4 py-2 font-mono text-foreground">
{r.error ? '—' : r.promptTokens}
</td>
{/each}
{#each compareResults as r}<td class="px-4 py-2 font-mono text-foreground"
>{r.error ? '—' : r.promptTokens}</td
>{/each}
</tr>
<tr>
<td class="px-4 py-2 text-muted-foreground">Completion Tokens</td>
{#each compareResults as r}
<td class="px-4 py-2 font-mono text-foreground">
{r.error ? '—' : r.completionTokens}
</td>
{/each}
{#each compareResults as r}<td class="px-4 py-2 font-mono text-foreground"
>{r.error ? '—' : r.completionTokens}</td
>{/each}
</tr>
</tbody>
</table>
</div>
<!-- Outputs side by side -->
<div
class="grid gap-4"
style="grid-template-columns: repeat({compareResults.length}, minmax(0, 1fr));"
@ -710,14 +941,10 @@
>
</div>
{#if r.error}
<div class="rounded-lg bg-red-500/10 p-3 text-sm text-red-400">
{r.error}
</div>
<div class="rounded-lg bg-red-500/10 p-3 text-sm text-red-400">{r.error}</div>
{:else}
<div
class="max-h-[50vh] overflow-y-auto whitespace-pre-wrap text-sm text-foreground"
>
{r.content}
<div class="llm-prose max-h-[50vh] overflow-y-auto text-sm text-foreground">
{@html renderMarkdown(r.content)}
</div>
{/if}
</div>
@ -726,5 +953,186 @@
{/if}
</div>
{/if}
<!-- Benchmark Tab -->
{#if activeTab === 'benchmark'}
<div class="flex flex-col gap-4">
<div class="rounded-xl border border-border bg-card p-4">
<p class="mb-3 text-sm text-muted-foreground">
Denselben Prompt N-mal gegen das geladene Modell laufen lassen, um Varianz zu messen.
</p>
{#if !isReady}
<div class="rounded-lg bg-yellow-500/10 p-3 text-sm text-yellow-400">
Lade zuerst ein Modell im Model-Controls-Bereich oben.
</div>
{:else}
<input
type="text"
bind:value={benchmarkSystemPrompt}
placeholder="System Prompt (optional)..."
class="mb-3 w-full rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
/>
<textarea
bind:value={benchmarkPrompt}
placeholder="Prompt für den Benchmark...&#10;&#10;z.B.: Zähle von 1 bis 20."
rows={3}
disabled={benchmarkRunning}
class="w-full resize-none rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none disabled:opacity-50"
></textarea>
<div class="mt-3 flex flex-wrap items-end gap-4">
<div class="flex flex-col gap-1">
<label for="bench-iters" class="text-xs text-muted-foreground">Iterationen</label>
<input
id="bench-iters"
type="number"
min="1"
max="50"
bind:value={benchmarkIterations}
disabled={benchmarkRunning}
class="w-24 rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
/>
</div>
<div class="flex flex-col gap-1">
<label for="bench-temp" class="text-xs text-muted-foreground">Temperature</label>
<input
id="bench-temp"
type="number"
min="0"
max="2"
step="0.1"
bind:value={benchmarkTemperature}
disabled={benchmarkRunning}
class="w-24 rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
/>
</div>
<div class="flex flex-col gap-1">
<label for="bench-tokens" class="text-xs text-muted-foreground">Max Tokens</label>
<input
id="bench-tokens"
type="number"
min="16"
max="2048"
step="16"
bind:value={benchmarkMaxTokens}
disabled={benchmarkRunning}
class="w-24 rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
/>
</div>
<button
onclick={handleBenchmark}
disabled={!benchmarkPrompt.trim() || benchmarkRunning}
class="rounded-lg bg-primary px-5 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
>
{#if benchmarkRunning}Run {benchmarkCurrentRun}/{benchmarkIterations}...{:else}Benchmark
starten{/if}
</button>
</div>
{/if}
</div>
{#if benchmarkRunning}
<div class="rounded-xl border border-yellow-500/30 bg-yellow-500/5 p-4">
<div class="mb-2 flex items-center gap-2">
<div class="h-2.5 w-2.5 animate-pulse rounded-full bg-yellow-500"></div>
<span class="text-sm font-medium text-foreground"
>{modelInfo.displayName} — Run {benchmarkCurrentRun}/{benchmarkIterations}</span
>
</div>
<div class="h-2 overflow-hidden rounded-full bg-muted">
<div
class="h-full rounded-full bg-primary transition-all duration-300"
style="width: {Math.round((benchmarkCurrentRun / benchmarkIterations) * 100)}%"
></div>
</div>
</div>
{/if}
{#if benchmarkStats}
<div class="grid grid-cols-2 gap-4">
<div class="rounded-xl border border-border bg-card p-4">
<div class="mb-2 text-xs font-medium text-muted-foreground">Latenz (ms)</div>
<div class="grid grid-cols-2 gap-y-2 text-sm">
<span class="text-muted-foreground">Durchschnitt</span><span
class="font-mono text-foreground">{benchmarkStats.avgLatency}</span
>
<span class="text-muted-foreground">Median</span><span
class="font-mono text-foreground">{benchmarkStats.medianLatency}</span
>
<span class="text-muted-foreground">Min</span><span class="font-mono text-green-500"
>{benchmarkStats.minLatency}</span
>
<span class="text-muted-foreground">Max</span><span class="font-mono text-red-400"
>{benchmarkStats.maxLatency}</span
>
</div>
</div>
<div class="rounded-xl border border-border bg-card p-4">
<div class="mb-2 text-xs font-medium text-muted-foreground">Speed (tok/s)</div>
<div class="grid grid-cols-2 gap-y-2 text-sm">
<span class="text-muted-foreground">Durchschnitt</span><span
class="font-mono text-foreground">{benchmarkStats.avgTokPerSec}</span
>
<span class="text-muted-foreground">Median</span><span
class="font-mono text-foreground">{benchmarkStats.medianTokPerSec}</span
>
<span class="text-muted-foreground">Max</span><span class="font-mono text-green-500"
>{benchmarkStats.maxTokPerSec}</span
>
<span class="text-muted-foreground">Min</span><span class="font-mono text-red-400"
>{benchmarkStats.minTokPerSec}</span
>
</div>
</div>
</div>
<div class="overflow-x-auto rounded-xl border border-border">
<table class="w-full text-sm">
<thead>
<tr class="border-b border-border bg-card">
<th class="px-4 py-2 text-left text-xs font-medium text-muted-foreground">Run</th>
<th class="px-4 py-2 text-left text-xs font-medium text-muted-foreground"
>Latenz</th
>
<th class="px-4 py-2 text-left text-xs font-medium text-muted-foreground"
>tok/s</th
>
<th class="px-4 py-2 text-left text-xs font-medium text-muted-foreground"
>Tokens</th
>
<th class="px-4 py-2 text-left text-xs font-medium text-muted-foreground"
>Verteilung</th
>
</tr>
</thead>
<tbody>
{#each benchmarkStats.runs as run}
{@const maxLat = benchmarkStats.maxLatency || 1}
<tr class="border-b border-border last:border-0">
<td class="px-4 py-2 font-mono text-muted-foreground">#{run.iteration}</td>
<td class="px-4 py-2 font-mono text-foreground"
>{(run.latencyMs / 1000).toFixed(2)}s</td
>
<td class="px-4 py-2 font-mono text-foreground">{run.tokPerSec}</td>
<td class="px-4 py-2 font-mono text-foreground">{run.completionTokens}</td>
<td class="px-4 py-2"
><div class="h-3 w-full rounded-full bg-muted">
<div
class="h-full rounded-full bg-primary/60"
style="width: {Math.round((run.latencyMs / maxLat) * 100)}%"
></div>
</div></td
>
</tr>
{/each}
</tbody>
</table>
</div>
<div class="text-xs text-muted-foreground">
Total: {benchmarkStats.totalTokens} tokens in {benchmarkStats.runs.length} runs ({modelInfo.displayName})
</div>
{/if}
</div>
{/if}
{/if}
</div>