From e5a6946d8bbed90a35d8e123eb31bbdb886c54ae Mon Sep 17 00:00:00 2001
From: Till JS <tills95@gmail.com>
Date: Thu, 2 Apr 2026 11:25:44 +0200
Subject: [PATCH] feat(manacore/web): add model comparison tab to LLM test page

Add a "Compare" tab that sequentially runs the same prompt against all
available models (currently Qwen 2.5 1.5B and 0.5B), showing results
side-by-side with a stats table (latency, tok/s, token counts) and
streaming preview during inference. Also includes fixes from earlier:
$derived.by for statusText, removed unused generateText import, added
chat auto-scroll with max-height.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../src/routes/(app)/llm-test/+page.svelte    | 406 ++++++++++++++----
 1 file changed, 329 insertions(+), 77 deletions(-)
diff --git a/apps/manacore/apps/web/src/routes/(app)/llm-test/+page.svelte b/apps/manacore/apps/web/src/routes/(app)/llm-test/+page.svelte
index fd3362c54..6a9b0d70b 100644
--- a/apps/manacore/apps/web/src/routes/(app)/llm-test/+page.svelte
+++ b/apps/manacore/apps/web/src/routes/(app)/llm-test/+page.svelte
@@ -12,9 +12,11 @@
 	} from '@manacore/local-llm';
 	import { Robot, Trash, PaperPlaneRight } from '@manacore/shared-icons';
 
+	const modelKeys = Object.keys(MODELS) as ModelKey[];
+
 	// --- State ---
 	let selectedModel: ModelKey = $state('qwen-2.5-1.5b');
-	let activeTab: 'chat' | 'extract' | 'classify' = $state('chat');
+	let activeTab: 'chat' | 'extract' | 'classify' | 'compare' = $state('chat');
 	const supported = isLocalLlmSupported();
 	const status = getLocalLlmStatus();
 
@@ -42,6 +44,27 @@
 	let classifyResult = $state('');
 	let classifyLoading = $state(false);
 
+	// Compare tab
+	interface CompareResult {
+		model: ModelKey;
+		displayName: string;
+		content: string;
+		latencyMs: number;
+		promptTokens: number;
+		completionTokens: number;
+		tokPerSec: number;
+		error?: string;
+	}
+
+	let comparePrompt = $state('');
+	let compareSystemPrompt = $state('');
+	let compareTemperature = $state(0.7);
+	let compareMaxTokens = $state(1024);
+	let compareResults = $state<CompareResult[]>([]);
+	let compareRunning = $state(false);
+	let compareCurrentModel = $state<string | null>(null);
+	let compareStreamingContent = $state('');
+
 	// --- Derived ---
 	let isReady = $derived(status.current.state === 'ready');
 	let isLoading = $derived(
@@ -108,7 +131,6 @@
 			if (systemPrompt.trim()) {
 				msgs.push({ role: 'system', content: systemPrompt.trim() });
 			}
-			// Include conversation history
 			for (const m of messages) {
 				msgs.push({ role: m.role, content: m.content });
 			}
@@ -174,6 +196,75 @@
 		}
 	}
 
+	async function handleCompare() {
+		if (!comparePrompt.trim() || compareRunning) return;
+		compareRunning = true;
+		compareResults = [];
+		compareStreamingContent = '';
+
+		const msgs: { role: 'system' | 'user'; content: string }[] = [];
+		if (compareSystemPrompt.trim()) {
+			msgs.push({ role: 'system', content: compareSystemPrompt.trim() });
+		}
+		msgs.push({ role: 'user', content: comparePrompt.trim() });
+
+		for (const modelKey of modelKeys) {
+			compareCurrentModel = MODELS[modelKey].displayName;
+			compareStreamingContent = '';
+
+			try {
+				await loadLocalLlm(modelKey);
+
+				const result = await generate({
+					messages: msgs,
+					temperature: compareTemperature,
+					maxTokens: compareMaxTokens,
+					onToken: (token) => {
+						compareStreamingContent += token;
+					},
+				});
+
+				const tokPerSec =
+					result.latencyMs > 0
+						? Math.round((result.usage.completion_tokens / result.latencyMs) * 1000)
+						: 0;
+
+				compareResults = [
+					...compareResults,
+					{
+						model: modelKey,
+						displayName: MODELS[modelKey].displayName,
+						content: result.content,
+						latencyMs: result.latencyMs,
+						promptTokens: result.usage.prompt_tokens,
+						completionTokens: result.usage.completion_tokens,
+						tokPerSec,
+					},
+				];
+			} catch (err) {
+				compareResults = [
+					...compareResults,
+					{
+						model: modelKey,
+						displayName: MODELS[modelKey].displayName,
+						content: '',
+						latencyMs: 0,
+						promptTokens: 0,
+						completionTokens: 0,
+						tokPerSec: 0,
+						error: err instanceof Error ? err.message : String(err),
+					},
+				];
+			}
+
+			await unloadLocalLlm();
+		}
+
+		compareCurrentModel = null;
+		compareStreamingContent = '';
+		compareRunning = false;
+	}
+
 	function handleClear() {
 		messages = [];
 		streamingContent = '';
@@ -194,7 +285,6 @@
 </svelte:head>
 
 <div class="mx-auto max-w-4xl">
-	<!-- Header -->
 	<header class="mb-6">
 		<h1 class="text-2xl font-bold text-foreground">Local LLM Test</h1>
 		<p class="mt-1 text-sm text-muted-foreground">
@@ -202,7 +292,6 @@
 		</p>
 	</header>
 
-	<!-- WebGPU Support Check -->
 	{#if !supported}
 		<div class="rounded-xl border border-red-500/30 bg-red-500/10 p-6 text-center">
 			<p class="text-lg font-semibold text-red-400">WebGPU nicht verfügbar</p>
@@ -212,79 +301,78 @@
 			</p>
 		</div>
 	{:else}
-		<!-- Model Controls -->
-		<div class="mb-6 rounded-xl border border-border bg-card p-4">
-			<div class="flex flex-wrap items-center gap-4">
-				<!-- Model Select -->
-				<div class="flex flex-col gap-1">
-					<label for="model-select" class="text-xs font-medium text-muted-foreground">Modell</label>
-					<select
-						id="model-select"
-						bind:value={selectedModel}
-						disabled={isLoading || isGenerating}
-						class="rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
-					>
-						{#each Object.entries(MODELS) as [key, model]}
-							<option value={key}>{model.displayName}</option>
-						{/each}
-					</select>
-				</div>
-
-				<!-- Model Info -->
-				<div class="flex flex-col gap-0.5 text-xs text-muted-foreground">
-					<span>Download: ~{modelInfo.downloadSizeMb} MB</span>
-					<span>RAM: ~{modelInfo.ramUsageMb} MB</span>
-				</div>
-
-				<!-- Load/Unload Button -->
-				<div class="flex items-center gap-2">
-					{#if isReady}
-						<button
-							onclick={handleUnload}
-							class="rounded-lg border border-border px-4 py-1.5 text-sm text-muted-foreground transition-colors hover:bg-muted"
+		<!-- Model Controls (hidden on Compare tab — it manages models itself) -->
+		{#if activeTab !== 'compare'}
+			<div class="mb-6 rounded-xl border border-border bg-card p-4">
+				<div class="flex flex-wrap items-center gap-4">
+					<div class="flex flex-col gap-1">
+						<label for="model-select" class="text-xs font-medium text-muted-foreground"
+							>Modell</label
 						>
-							Entladen
-						</button>
-					{:else}
-						<button
-							onclick={handleLoad}
-							disabled={isLoading}
-							class="rounded-lg bg-primary px-4 py-1.5 text-sm font-medium text-primary-foreground transition-opacity disabled:opacity-50"
+						<select
+							id="model-select"
+							bind:value={selectedModel}
+							disabled={isLoading || isGenerating}
+							class="rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
 						>
-							{isLoading ? 'Lädt...' : 'Modell laden'}
-						</button>
-					{/if}
+							{#each Object.entries(MODELS) as [key, model]}
+								<option value={key}>{model.displayName}</option>
+							{/each}
+						</select>
+					</div>
+
+					<div class="flex flex-col gap-0.5 text-xs text-muted-foreground">
+						<span>Download: ~{modelInfo.downloadSizeMb} MB</span>
+						<span>RAM: ~{modelInfo.ramUsageMb} MB</span>
+					</div>
+
+					<div class="flex items-center gap-2">
+						{#if isReady}
+							<button
+								onclick={handleUnload}
+								class="rounded-lg border border-border px-4 py-1.5 text-sm text-muted-foreground transition-colors hover:bg-muted"
+							>
+								Entladen
+							</button>
+						{:else}
+							<button
+								onclick={handleLoad}
+								disabled={isLoading}
+								class="rounded-lg bg-primary px-4 py-1.5 text-sm font-medium text-primary-foreground transition-opacity disabled:opacity-50"
+							>
+								{isLoading ? 'Lädt...' : 'Modell laden'}
+							</button>
+						{/if}
+					</div>
+
+					<div class="ml-auto flex items-center gap-2">
+						<div
+							class="h-2.5 w-2.5 rounded-full {isReady
+								? 'bg-green-500'
+								: isLoading
+									? 'animate-pulse bg-yellow-500'
+									: status.current.state === 'error'
+										? 'bg-red-500'
+										: 'bg-muted-foreground/30'}"
+						></div>
+						<span class="text-xs text-muted-foreground">{statusText}</span>
+					</div>
 				</div>
 
-				<!-- Status -->
-				<div class="ml-auto flex items-center gap-2">
-					<div
-						class="h-2.5 w-2.5 rounded-full {isReady
-							? 'bg-green-500'
-							: isLoading
-								? 'bg-yellow-500 animate-pulse'
-								: status.current.state === 'error'
-									? 'bg-red-500'
-									: 'bg-muted-foreground/30'}"
-					></div>
-					<span class="text-xs text-muted-foreground">{statusText}</span>
-				</div>
+				{#if progress !== null}
+					<div class="mt-3 h-2 overflow-hidden rounded-full bg-muted">
+						<div
+							class="h-full rounded-full bg-primary transition-all duration-300"
+							style="width: {Math.round(progress * 100)}%"
+						></div>
+					</div>
+				{/if}
 			</div>
-
-			<!-- Progress Bar -->
-			{#if progress !== null}
-				<div class="mt-3 h-2 overflow-hidden rounded-full bg-muted">
-					<div
-						class="h-full rounded-full bg-primary transition-all duration-300"
-						style="width: {Math.round(progress * 100)}%"
-					></div>
-				</div>
-			{/if}
-		</div>
+		{/if}
 
 		<!-- Tabs -->
 		<div class="mb-4 flex gap-1 rounded-lg border border-border bg-card p-1">
-			{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }] as tab}
+			{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }] as tab}
 				<button
 					onclick={() => (activeTab = tab.id as typeof activeTab)}
 					class="flex-1 rounded-md px-3 py-1.5 text-sm font-medium transition-colors {activeTab ===
@@ -300,7 +388,6 @@
 		<!-- Chat Tab -->
 		{#if activeTab === 'chat'}
 			<div class="flex flex-col gap-4">
-				<!-- System Prompt -->
 				<input
 					type="text"
 					bind:value={systemPrompt}
@@ -308,7 +395,6 @@
 					class="rounded-xl border border-border bg-card px-4 py-2.5 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
 				/>
 
-				<!-- Messages -->
 				<div
 					bind:this={chatContainer}
 					class="max-h-[60vh] min-h-[300px] space-y-3 overflow-y-auto rounded-xl border border-border bg-background/50 p-4"
@@ -351,23 +437,21 @@
 					{/if}
 				</div>
 
-				<!-- Stats -->
 				{#if lastLatency !== null}
 					<div class="flex gap-4 text-xs text-muted-foreground">
 						<span>Latenz: {lastLatency}ms</span>
 						{#if lastTokens}
 							<span>Prompt: {lastTokens.prompt} tokens</span>
 							<span>Completion: {lastTokens.completion} tokens</span>
-							<span
-								>Speed: {lastLatency > 0
+							<span>
+								Speed: {lastLatency > 0
 									? Math.round((lastTokens.completion / lastLatency) * 1000)
-									: 0} tok/s</span
-							>
+									: 0} tok/s
+							</span>
 						{/if}
 					</div>
 				{/if}
 
-				<!-- Input -->
 				<div class="flex gap-3">
 					<textarea
 						bind:value={userInput}
@@ -474,5 +558,173 @@
 				{/if}
 			</div>
 		{/if}
+
+		<!-- Compare Tab -->
+		{#if activeTab === 'compare'}
+			<div class="flex flex-col gap-4">
+				<div class="rounded-xl border border-border bg-card p-4">
+					<p class="mb-3 text-sm text-muted-foreground">
+						Denselben Prompt sequentiell gegen alle {modelKeys.length} Modelle testen. Jedes Modell wird
+						geladen, inferiert und wieder entladen — die Ergebnisse erscheinen nebeneinander.
+					</p>
+
+					<input
+						type="text"
+						bind:value={compareSystemPrompt}
+						placeholder="System Prompt (optional)..."
+						class="mb-3 w-full rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
+					/>
+
+					<textarea
+						bind:value={comparePrompt}
+						placeholder="Prompt eingeben, der gegen alle Modelle getestet wird...&#10;&#10;z.B.: Erkläre Quantencomputing in 3 Sätzen."
+						rows={4}
+						disabled={compareRunning}
+						class="w-full resize-none rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none disabled:opacity-50"
+					></textarea>
+
+					<div class="mt-3 flex flex-wrap items-end gap-4">
+						<div class="flex flex-col gap-1">
+							<label for="compare-temp" class="text-xs text-muted-foreground">Temperature</label>
+							<input
+								id="compare-temp"
+								type="number"
+								min="0"
+								max="2"
+								step="0.1"
+								bind:value={compareTemperature}
+								disabled={compareRunning}
+								class="w-24 rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
+							/>
+						</div>
+						<div class="flex flex-col gap-1">
+							<label for="compare-tokens" class="text-xs text-muted-foreground">Max Tokens</label>
+							<input
+								id="compare-tokens"
+								type="number"
+								min="64"
+								max="4096"
+								step="64"
+								bind:value={compareMaxTokens}
+								disabled={compareRunning}
+								class="w-24 rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
+							/>
+						</div>
+						<button
+							onclick={handleCompare}
+							disabled={!comparePrompt.trim() || compareRunning}
+							class="rounded-lg bg-primary px-5 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
+						>
+							{compareRunning ? 'Läuft...' : `Alle ${modelKeys.length} Modelle vergleichen`}
+						</button>
+					</div>
+				</div>
+
+				<!-- Running indicator -->
+				{#if compareRunning && compareCurrentModel}
+					<div class="rounded-xl border border-yellow-500/30 bg-yellow-500/5 p-4">
+						<div class="mb-2 flex items-center gap-2">
+							<div class="h-2.5 w-2.5 animate-pulse rounded-full bg-yellow-500"></div>
+							<span class="text-sm font-medium text-foreground">{compareCurrentModel}</span>
+							<span class="text-xs text-muted-foreground">
+								({compareResults.length + 1}/{modelKeys.length})
+							</span>
+						</div>
+						{#if compareStreamingContent}
+							<div
+								class="max-h-32 overflow-y-auto whitespace-pre-wrap text-sm text-muted-foreground"
+							>
+								{compareStreamingContent}<span class="animate-pulse">|</span>
+							</div>
+						{:else}
+							<div class="text-xs text-muted-foreground">{statusText}</div>
+						{/if}
+					</div>
+				{/if}
+
+				<!-- Results -->
+				{#if compareResults.length > 0}
+					<!-- Stats comparison table -->
+					<div class="overflow-x-auto rounded-xl border border-border">
+						<table class="w-full text-sm">
+							<thead>
+								<tr class="border-b border-border bg-card">
+									<th class="px-4 py-2.5 text-left text-xs font-medium text-muted-foreground"
+										>Metrik</th
+									>
+									{#each compareResults as r}
+										<th class="px-4 py-2.5 text-left text-xs font-medium text-muted-foreground">
+											{r.displayName}
+										</th>
+									{/each}
+								</tr>
+							</thead>
+							<tbody>
+								<tr class="border-b border-border">
+									<td class="px-4 py-2 text-muted-foreground">Latenz</td>
+									{#each compareResults as r}
+										<td class="px-4 py-2 font-mono {r.error ? 'text-red-400' : 'text-foreground'}">
+											{r.error ? 'Fehler' : `${(r.latencyMs / 1000).toFixed(1)}s`}
+										</td>
+									{/each}
+								</tr>
+								<tr class="border-b border-border">
+									<td class="px-4 py-2 text-muted-foreground">Speed</td>
+									{#each compareResults as r}
+										<td class="px-4 py-2 font-mono text-foreground">
+											{r.error ? '—' : `${r.tokPerSec} tok/s`}
+										</td>
+									{/each}
+								</tr>
+								<tr class="border-b border-border">
+									<td class="px-4 py-2 text-muted-foreground">Prompt Tokens</td>
+									{#each compareResults as r}
+										<td class="px-4 py-2 font-mono text-foreground">
+											{r.error ? '—' : r.promptTokens}
+										</td>
+									{/each}
+								</tr>
+								<tr>
+									<td class="px-4 py-2 text-muted-foreground">Completion Tokens</td>
+									{#each compareResults as r}
+										<td class="px-4 py-2 font-mono text-foreground">
+											{r.error ? '—' : r.completionTokens}
+										</td>
+									{/each}
+								</tr>
+							</tbody>
+						</table>
+					</div>
+
+					<!-- Outputs side by side -->
+					<div
+						class="grid gap-4"
+						style="grid-template-columns: repeat({compareResults.length}, minmax(0, 1fr));"
+					>
+						{#each compareResults as r}
+							<div class="rounded-xl border border-border bg-card p-4">
+								<div class="mb-3 flex items-center justify-between">
+									<span class="text-sm font-semibold text-foreground">{r.displayName}</span>
+									<span class="text-xs text-muted-foreground"
+										>{(r.latencyMs / 1000).toFixed(1)}s</span
+									>
+								</div>
+								{#if r.error}
+									<div class="rounded-lg bg-red-500/10 p-3 text-sm text-red-400">
+										{r.error}
+									</div>
+								{:else}
+									<div
+										class="max-h-[50vh] overflow-y-auto whitespace-pre-wrap text-sm text-foreground"
+									>
+										{r.content}
+									</div>
+								{/if}
+							</div>
+						{/each}
+					</div>
+				{/if}
+			</div>
+		{/if}
 	{/if}
 </div>

Metrik	+ {r.displayName} +
Latenz	+ {r.error ? 'Fehler' : `${(r.latencyMs / 1000).toFixed(1)}s`} +
Speed	+ {r.error ? '—' : `${r.tokPerSec} tok/s`} +
Prompt Tokens	+ {r.error ? '—' : r.promptTokens} +
Completion Tokens	+ {r.error ? '—' : r.completionTokens} +