mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 20:01:09 +02:00
feat(manacore/web): add model comparison tab to LLM test page
Add a "Compare" tab that sequentially runs the same prompt against all available models (currently Qwen 2.5 1.5B and 0.5B), showing results side-by-side with a stats table (latency, tok/s, token counts) and streaming preview during inference. Also includes fixes from earlier: $derived.by for statusText, removed unused generateText import, added chat auto-scroll with max-height. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a658822a40
commit
e5a6946d8b
1 changed files with 329 additions and 77 deletions
|
|
@ -12,9 +12,11 @@
|
|||
} from '@manacore/local-llm';
|
||||
import { Robot, Trash, PaperPlaneRight } from '@manacore/shared-icons';
|
||||
|
||||
const modelKeys = Object.keys(MODELS) as ModelKey[];
|
||||
|
||||
// --- State ---
|
||||
let selectedModel: ModelKey = $state('qwen-2.5-1.5b');
|
||||
let activeTab: 'chat' | 'extract' | 'classify' = $state('chat');
|
||||
let activeTab: 'chat' | 'extract' | 'classify' | 'compare' = $state('chat');
|
||||
const supported = isLocalLlmSupported();
|
||||
const status = getLocalLlmStatus();
|
||||
|
||||
|
|
@ -42,6 +44,27 @@
|
|||
let classifyResult = $state('');
|
||||
let classifyLoading = $state(false);
|
||||
|
||||
// Compare tab
|
||||
interface CompareResult {
|
||||
model: ModelKey;
|
||||
displayName: string;
|
||||
content: string;
|
||||
latencyMs: number;
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
tokPerSec: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
let comparePrompt = $state('');
|
||||
let compareSystemPrompt = $state('');
|
||||
let compareTemperature = $state(0.7);
|
||||
let compareMaxTokens = $state(1024);
|
||||
let compareResults = $state<CompareResult[]>([]);
|
||||
let compareRunning = $state(false);
|
||||
let compareCurrentModel = $state<string | null>(null);
|
||||
let compareStreamingContent = $state('');
|
||||
|
||||
// --- Derived ---
|
||||
let isReady = $derived(status.current.state === 'ready');
|
||||
let isLoading = $derived(
|
||||
|
|
@ -108,7 +131,6 @@
|
|||
if (systemPrompt.trim()) {
|
||||
msgs.push({ role: 'system', content: systemPrompt.trim() });
|
||||
}
|
||||
// Include conversation history
|
||||
for (const m of messages) {
|
||||
msgs.push({ role: m.role, content: m.content });
|
||||
}
|
||||
|
|
@ -174,6 +196,75 @@
|
|||
}
|
||||
}
|
||||
|
||||
async function handleCompare() {
|
||||
if (!comparePrompt.trim() || compareRunning) return;
|
||||
compareRunning = true;
|
||||
compareResults = [];
|
||||
compareStreamingContent = '';
|
||||
|
||||
const msgs: { role: 'system' | 'user'; content: string }[] = [];
|
||||
if (compareSystemPrompt.trim()) {
|
||||
msgs.push({ role: 'system', content: compareSystemPrompt.trim() });
|
||||
}
|
||||
msgs.push({ role: 'user', content: comparePrompt.trim() });
|
||||
|
||||
for (const modelKey of modelKeys) {
|
||||
compareCurrentModel = MODELS[modelKey].displayName;
|
||||
compareStreamingContent = '';
|
||||
|
||||
try {
|
||||
await loadLocalLlm(modelKey);
|
||||
|
||||
const result = await generate({
|
||||
messages: msgs,
|
||||
temperature: compareTemperature,
|
||||
maxTokens: compareMaxTokens,
|
||||
onToken: (token) => {
|
||||
compareStreamingContent += token;
|
||||
},
|
||||
});
|
||||
|
||||
const tokPerSec =
|
||||
result.latencyMs > 0
|
||||
? Math.round((result.usage.completion_tokens / result.latencyMs) * 1000)
|
||||
: 0;
|
||||
|
||||
compareResults = [
|
||||
...compareResults,
|
||||
{
|
||||
model: modelKey,
|
||||
displayName: MODELS[modelKey].displayName,
|
||||
content: result.content,
|
||||
latencyMs: result.latencyMs,
|
||||
promptTokens: result.usage.prompt_tokens,
|
||||
completionTokens: result.usage.completion_tokens,
|
||||
tokPerSec,
|
||||
},
|
||||
];
|
||||
} catch (err) {
|
||||
compareResults = [
|
||||
...compareResults,
|
||||
{
|
||||
model: modelKey,
|
||||
displayName: MODELS[modelKey].displayName,
|
||||
content: '',
|
||||
latencyMs: 0,
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
tokPerSec: 0,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
await unloadLocalLlm();
|
||||
}
|
||||
|
||||
compareCurrentModel = null;
|
||||
compareStreamingContent = '';
|
||||
compareRunning = false;
|
||||
}
|
||||
|
||||
function handleClear() {
|
||||
messages = [];
|
||||
streamingContent = '';
|
||||
|
|
@ -194,7 +285,6 @@
|
|||
</svelte:head>
|
||||
|
||||
<div class="mx-auto max-w-4xl">
|
||||
<!-- Header -->
|
||||
<header class="mb-6">
|
||||
<h1 class="text-2xl font-bold text-foreground">Local LLM Test</h1>
|
||||
<p class="mt-1 text-sm text-muted-foreground">
|
||||
|
|
@ -202,7 +292,6 @@
|
|||
</p>
|
||||
</header>
|
||||
|
||||
<!-- WebGPU Support Check -->
|
||||
{#if !supported}
|
||||
<div class="rounded-xl border border-red-500/30 bg-red-500/10 p-6 text-center">
|
||||
<p class="text-lg font-semibold text-red-400">WebGPU nicht verfügbar</p>
|
||||
|
|
@ -212,79 +301,78 @@
|
|||
</p>
|
||||
</div>
|
||||
{:else}
|
||||
<!-- Model Controls -->
|
||||
<div class="mb-6 rounded-xl border border-border bg-card p-4">
|
||||
<div class="flex flex-wrap items-center gap-4">
|
||||
<!-- Model Select -->
|
||||
<div class="flex flex-col gap-1">
|
||||
<label for="model-select" class="text-xs font-medium text-muted-foreground">Modell</label>
|
||||
<select
|
||||
id="model-select"
|
||||
bind:value={selectedModel}
|
||||
disabled={isLoading || isGenerating}
|
||||
class="rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
|
||||
>
|
||||
{#each Object.entries(MODELS) as [key, model]}
|
||||
<option value={key}>{model.displayName}</option>
|
||||
{/each}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<!-- Model Info -->
|
||||
<div class="flex flex-col gap-0.5 text-xs text-muted-foreground">
|
||||
<span>Download: ~{modelInfo.downloadSizeMb} MB</span>
|
||||
<span>RAM: ~{modelInfo.ramUsageMb} MB</span>
|
||||
</div>
|
||||
|
||||
<!-- Load/Unload Button -->
|
||||
<div class="flex items-center gap-2">
|
||||
{#if isReady}
|
||||
<button
|
||||
onclick={handleUnload}
|
||||
class="rounded-lg border border-border px-4 py-1.5 text-sm text-muted-foreground transition-colors hover:bg-muted"
|
||||
<!-- Model Controls (hidden on Compare tab — it manages models itself) -->
|
||||
{#if activeTab !== 'compare'}
|
||||
<div class="mb-6 rounded-xl border border-border bg-card p-4">
|
||||
<div class="flex flex-wrap items-center gap-4">
|
||||
<div class="flex flex-col gap-1">
|
||||
<label for="model-select" class="text-xs font-medium text-muted-foreground"
|
||||
>Modell</label
|
||||
>
|
||||
Entladen
|
||||
</button>
|
||||
{:else}
|
||||
<button
|
||||
onclick={handleLoad}
|
||||
disabled={isLoading}
|
||||
class="rounded-lg bg-primary px-4 py-1.5 text-sm font-medium text-primary-foreground transition-opacity disabled:opacity-50"
|
||||
<select
|
||||
id="model-select"
|
||||
bind:value={selectedModel}
|
||||
disabled={isLoading || isGenerating}
|
||||
class="rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
|
||||
>
|
||||
{isLoading ? 'Lädt...' : 'Modell laden'}
|
||||
</button>
|
||||
{/if}
|
||||
{#each Object.entries(MODELS) as [key, model]}
|
||||
<option value={key}>{model.displayName}</option>
|
||||
{/each}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="flex flex-col gap-0.5 text-xs text-muted-foreground">
|
||||
<span>Download: ~{modelInfo.downloadSizeMb} MB</span>
|
||||
<span>RAM: ~{modelInfo.ramUsageMb} MB</span>
|
||||
</div>
|
||||
|
||||
<div class="flex items-center gap-2">
|
||||
{#if isReady}
|
||||
<button
|
||||
onclick={handleUnload}
|
||||
class="rounded-lg border border-border px-4 py-1.5 text-sm text-muted-foreground transition-colors hover:bg-muted"
|
||||
>
|
||||
Entladen
|
||||
</button>
|
||||
{:else}
|
||||
<button
|
||||
onclick={handleLoad}
|
||||
disabled={isLoading}
|
||||
class="rounded-lg bg-primary px-4 py-1.5 text-sm font-medium text-primary-foreground transition-opacity disabled:opacity-50"
|
||||
>
|
||||
{isLoading ? 'Lädt...' : 'Modell laden'}
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<div class="ml-auto flex items-center gap-2">
|
||||
<div
|
||||
class="h-2.5 w-2.5 rounded-full {isReady
|
||||
? 'bg-green-500'
|
||||
: isLoading
|
||||
? 'animate-pulse bg-yellow-500'
|
||||
: status.current.state === 'error'
|
||||
? 'bg-red-500'
|
||||
: 'bg-muted-foreground/30'}"
|
||||
></div>
|
||||
<span class="text-xs text-muted-foreground">{statusText}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Status -->
|
||||
<div class="ml-auto flex items-center gap-2">
|
||||
<div
|
||||
class="h-2.5 w-2.5 rounded-full {isReady
|
||||
? 'bg-green-500'
|
||||
: isLoading
|
||||
? 'bg-yellow-500 animate-pulse'
|
||||
: status.current.state === 'error'
|
||||
? 'bg-red-500'
|
||||
: 'bg-muted-foreground/30'}"
|
||||
></div>
|
||||
<span class="text-xs text-muted-foreground">{statusText}</span>
|
||||
</div>
|
||||
{#if progress !== null}
|
||||
<div class="mt-3 h-2 overflow-hidden rounded-full bg-muted">
|
||||
<div
|
||||
class="h-full rounded-full bg-primary transition-all duration-300"
|
||||
style="width: {Math.round(progress * 100)}%"
|
||||
></div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<!-- Progress Bar -->
|
||||
{#if progress !== null}
|
||||
<div class="mt-3 h-2 overflow-hidden rounded-full bg-muted">
|
||||
<div
|
||||
class="h-full rounded-full bg-primary transition-all duration-300"
|
||||
style="width: {Math.round(progress * 100)}%"
|
||||
></div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Tabs -->
|
||||
<div class="mb-4 flex gap-1 rounded-lg border border-border bg-card p-1">
|
||||
{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }] as tab}
|
||||
{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }] as tab}
|
||||
<button
|
||||
onclick={() => (activeTab = tab.id as typeof activeTab)}
|
||||
class="flex-1 rounded-md px-3 py-1.5 text-sm font-medium transition-colors {activeTab ===
|
||||
|
|
@ -300,7 +388,6 @@
|
|||
<!-- Chat Tab -->
|
||||
{#if activeTab === 'chat'}
|
||||
<div class="flex flex-col gap-4">
|
||||
<!-- System Prompt -->
|
||||
<input
|
||||
type="text"
|
||||
bind:value={systemPrompt}
|
||||
|
|
@ -308,7 +395,6 @@
|
|||
class="rounded-xl border border-border bg-card px-4 py-2.5 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
|
||||
/>
|
||||
|
||||
<!-- Messages -->
|
||||
<div
|
||||
bind:this={chatContainer}
|
||||
class="max-h-[60vh] min-h-[300px] space-y-3 overflow-y-auto rounded-xl border border-border bg-background/50 p-4"
|
||||
|
|
@ -351,23 +437,21 @@
|
|||
{/if}
|
||||
</div>
|
||||
|
||||
<!-- Stats -->
|
||||
{#if lastLatency !== null}
|
||||
<div class="flex gap-4 text-xs text-muted-foreground">
|
||||
<span>Latenz: {lastLatency}ms</span>
|
||||
{#if lastTokens}
|
||||
<span>Prompt: {lastTokens.prompt} tokens</span>
|
||||
<span>Completion: {lastTokens.completion} tokens</span>
|
||||
<span
|
||||
>Speed: {lastLatency > 0
|
||||
<span>
|
||||
Speed: {lastLatency > 0
|
||||
? Math.round((lastTokens.completion / lastLatency) * 1000)
|
||||
: 0} tok/s</span
|
||||
>
|
||||
: 0} tok/s
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Input -->
|
||||
<div class="flex gap-3">
|
||||
<textarea
|
||||
bind:value={userInput}
|
||||
|
|
@ -474,5 +558,173 @@
|
|||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Compare Tab -->
|
||||
{#if activeTab === 'compare'}
|
||||
<div class="flex flex-col gap-4">
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<p class="mb-3 text-sm text-muted-foreground">
|
||||
Denselben Prompt sequentiell gegen alle {modelKeys.length} Modelle testen. Jedes Modell wird
|
||||
geladen, inferiert und wieder entladen — die Ergebnisse erscheinen nebeneinander.
|
||||
</p>
|
||||
|
||||
<input
|
||||
type="text"
|
||||
bind:value={compareSystemPrompt}
|
||||
placeholder="System Prompt (optional)..."
|
||||
class="mb-3 w-full rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
|
||||
/>
|
||||
|
||||
<textarea
|
||||
bind:value={comparePrompt}
|
||||
placeholder="Prompt eingeben, der gegen alle Modelle getestet wird... z.B.: Erkläre Quantencomputing in 3 Sätzen."
|
||||
rows={4}
|
||||
disabled={compareRunning}
|
||||
class="w-full resize-none rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none disabled:opacity-50"
|
||||
></textarea>
|
||||
|
||||
<div class="mt-3 flex flex-wrap items-end gap-4">
|
||||
<div class="flex flex-col gap-1">
|
||||
<label for="compare-temp" class="text-xs text-muted-foreground">Temperature</label>
|
||||
<input
|
||||
id="compare-temp"
|
||||
type="number"
|
||||
min="0"
|
||||
max="2"
|
||||
step="0.1"
|
||||
bind:value={compareTemperature}
|
||||
disabled={compareRunning}
|
||||
class="w-24 rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
|
||||
/>
|
||||
</div>
|
||||
<div class="flex flex-col gap-1">
|
||||
<label for="compare-tokens" class="text-xs text-muted-foreground">Max Tokens</label>
|
||||
<input
|
||||
id="compare-tokens"
|
||||
type="number"
|
||||
min="64"
|
||||
max="4096"
|
||||
step="64"
|
||||
bind:value={compareMaxTokens}
|
||||
disabled={compareRunning}
|
||||
class="w-24 rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
|
||||
/>
|
||||
</div>
|
||||
<button
|
||||
onclick={handleCompare}
|
||||
disabled={!comparePrompt.trim() || compareRunning}
|
||||
class="rounded-lg bg-primary px-5 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
|
||||
>
|
||||
{compareRunning ? 'Läuft...' : `Alle ${modelKeys.length} Modelle vergleichen`}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Running indicator -->
|
||||
{#if compareRunning && compareCurrentModel}
|
||||
<div class="rounded-xl border border-yellow-500/30 bg-yellow-500/5 p-4">
|
||||
<div class="mb-2 flex items-center gap-2">
|
||||
<div class="h-2.5 w-2.5 animate-pulse rounded-full bg-yellow-500"></div>
|
||||
<span class="text-sm font-medium text-foreground">{compareCurrentModel}</span>
|
||||
<span class="text-xs text-muted-foreground">
|
||||
({compareResults.length + 1}/{modelKeys.length})
|
||||
</span>
|
||||
</div>
|
||||
{#if compareStreamingContent}
|
||||
<div
|
||||
class="max-h-32 overflow-y-auto whitespace-pre-wrap text-sm text-muted-foreground"
|
||||
>
|
||||
{compareStreamingContent}<span class="animate-pulse">|</span>
|
||||
</div>
|
||||
{:else}
|
||||
<div class="text-xs text-muted-foreground">{statusText}</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Results -->
|
||||
{#if compareResults.length > 0}
|
||||
<!-- Stats comparison table -->
|
||||
<div class="overflow-x-auto rounded-xl border border-border">
|
||||
<table class="w-full text-sm">
|
||||
<thead>
|
||||
<tr class="border-b border-border bg-card">
|
||||
<th class="px-4 py-2.5 text-left text-xs font-medium text-muted-foreground"
|
||||
>Metrik</th
|
||||
>
|
||||
{#each compareResults as r}
|
||||
<th class="px-4 py-2.5 text-left text-xs font-medium text-muted-foreground">
|
||||
{r.displayName}
|
||||
</th>
|
||||
{/each}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="border-b border-border">
|
||||
<td class="px-4 py-2 text-muted-foreground">Latenz</td>
|
||||
{#each compareResults as r}
|
||||
<td class="px-4 py-2 font-mono {r.error ? 'text-red-400' : 'text-foreground'}">
|
||||
{r.error ? 'Fehler' : `${(r.latencyMs / 1000).toFixed(1)}s`}
|
||||
</td>
|
||||
{/each}
|
||||
</tr>
|
||||
<tr class="border-b border-border">
|
||||
<td class="px-4 py-2 text-muted-foreground">Speed</td>
|
||||
{#each compareResults as r}
|
||||
<td class="px-4 py-2 font-mono text-foreground">
|
||||
{r.error ? '—' : `${r.tokPerSec} tok/s`}
|
||||
</td>
|
||||
{/each}
|
||||
</tr>
|
||||
<tr class="border-b border-border">
|
||||
<td class="px-4 py-2 text-muted-foreground">Prompt Tokens</td>
|
||||
{#each compareResults as r}
|
||||
<td class="px-4 py-2 font-mono text-foreground">
|
||||
{r.error ? '—' : r.promptTokens}
|
||||
</td>
|
||||
{/each}
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="px-4 py-2 text-muted-foreground">Completion Tokens</td>
|
||||
{#each compareResults as r}
|
||||
<td class="px-4 py-2 font-mono text-foreground">
|
||||
{r.error ? '—' : r.completionTokens}
|
||||
</td>
|
||||
{/each}
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<!-- Outputs side by side -->
|
||||
<div
|
||||
class="grid gap-4"
|
||||
style="grid-template-columns: repeat({compareResults.length}, minmax(0, 1fr));"
|
||||
>
|
||||
{#each compareResults as r}
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<div class="mb-3 flex items-center justify-between">
|
||||
<span class="text-sm font-semibold text-foreground">{r.displayName}</span>
|
||||
<span class="text-xs text-muted-foreground"
|
||||
>{(r.latencyMs / 1000).toFixed(1)}s</span
|
||||
>
|
||||
</div>
|
||||
{#if r.error}
|
||||
<div class="rounded-lg bg-red-500/10 p-3 text-sm text-red-400">
|
||||
{r.error}
|
||||
</div>
|
||||
{:else}
|
||||
<div
|
||||
class="max-h-[50vh] overflow-y-auto whitespace-pre-wrap text-sm text-foreground"
|
||||
>
|
||||
{r.content}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
{/if}
|
||||
</div>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue