mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 20:01:09 +02:00
feat(manacore/web): add benchmark, compare history, markdown & cache status to LLM test
- Benchmark tab: run same prompt N times against loaded model, show avg/min/max/median for latency and tok/s with per-run table and bars - Compare history: save results to localStorage (max 20), restore or delete previous comparisons via History panel - Markdown rendering: use marked for assistant responses in chat tab and compare outputs with proper code blocks, lists, headings - Model cache status: check browser Cache API via WebLLM hasModelInCache, show cached/uncached per model with size badges - Add marked dependency Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
348b6ff231
commit
c3c02c6a22
2 changed files with 523 additions and 114 deletions
|
|
@ -42,9 +42,13 @@
|
|||
"vitest": "^4.0.14"
|
||||
},
|
||||
"dependencies": {
|
||||
"@calc/shared": "workspace:*",
|
||||
"@clock/shared": "workspace:*",
|
||||
"@manacore/credits": "workspace:^",
|
||||
"@manacore/feedback": "workspace:*",
|
||||
"@manacore/help": "workspace:*",
|
||||
"@manacore/local-llm": "workspace:*",
|
||||
"@manacore/local-store": "workspace:*",
|
||||
"@manacore/shared-links": "workspace:*",
|
||||
"@manacore/qr-export": "workspace:*",
|
||||
"@manacore/shared-auth": "workspace:*",
|
||||
"@manacore/shared-auth-stores": "workspace:*",
|
||||
|
|
@ -52,30 +56,27 @@
|
|||
"@manacore/shared-branding": "workspace:*",
|
||||
"@manacore/shared-config": "workspace:*",
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/feedback": "workspace:*",
|
||||
"@manacore/shared-i18n": "workspace:*",
|
||||
"@manacore/help": "workspace:*",
|
||||
"@manacore/shared-icons": "workspace:*",
|
||||
"@manacore/shared-links": "workspace:*",
|
||||
"@manacore/shared-profile-ui": "workspace:*",
|
||||
"@manacore/shared-stores": "workspace:*",
|
||||
"@manacore/shared-tags": "workspace:*",
|
||||
"@manacore/subscriptions": "workspace:*",
|
||||
"@manacore/shared-tailwind": "workspace:*",
|
||||
"@manacore/shared-theme": "workspace:*",
|
||||
"@manacore/shared-theme-ui": "workspace:*",
|
||||
"@manacore/shared-types": "workspace:*",
|
||||
"@manacore/shared-uload": "workspace:*",
|
||||
"@manacore/shared-ui": "workspace:*",
|
||||
"@manacore/shared-uload": "workspace:*",
|
||||
"@manacore/shared-utils": "workspace:*",
|
||||
"@manacore/spiral-db": "workspace:*",
|
||||
"@manacore/subscriptions": "workspace:*",
|
||||
"@manacore/wallpaper-generator": "workspace:*",
|
||||
"@calc/shared": "workspace:*",
|
||||
"@clock/shared": "workspace:*",
|
||||
"@zitare/content": "workspace:*",
|
||||
"date-fns": "^4.1.0",
|
||||
"dexie": "^4.0.11",
|
||||
"marked": "^17.0.5",
|
||||
"svelte-dnd-action": "^0.9.68",
|
||||
"leaflet": "^1.9.4",
|
||||
"svelte-i18n": "^4.0.0"
|
||||
},
|
||||
"type": "module"
|
||||
|
|
|
|||
|
|
@ -10,13 +10,40 @@
|
|||
MODELS,
|
||||
type ModelKey,
|
||||
} from '@manacore/local-llm';
|
||||
import { Robot, Trash, PaperPlaneRight } from '@manacore/shared-icons';
|
||||
import { marked } from 'marked';
|
||||
import { Robot, Trash, PaperPlaneRight, ClockCounterClockwise } from '@manacore/shared-icons';
|
||||
|
||||
const modelKeys = Object.keys(MODELS) as ModelKey[];
|
||||
|
||||
// --- Markdown rendering ---
|
||||
marked.setOptions({ breaks: true, gfm: true });
|
||||
|
||||
function renderMarkdown(text: string): string {
|
||||
return marked.parse(text, { async: false }) as string;
|
||||
}
|
||||
|
||||
// --- Model cache status ---
|
||||
let modelCacheStatus = $state<Record<string, boolean>>({});
|
||||
|
||||
async function checkModelCache() {
|
||||
if (typeof caches === 'undefined') return;
|
||||
for (const [key, config] of Object.entries(MODELS)) {
|
||||
try {
|
||||
const { hasModelInCache } = await import('@mlc-ai/web-llm');
|
||||
modelCacheStatus[key] = await hasModelInCache(config.modelId);
|
||||
} catch {
|
||||
modelCacheStatus[key] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof window !== 'undefined') {
|
||||
checkModelCache();
|
||||
}
|
||||
|
||||
// --- State ---
|
||||
let selectedModel: ModelKey = $state('qwen-2.5-1.5b');
|
||||
let activeTab: 'chat' | 'extract' | 'classify' | 'compare' = $state('chat');
|
||||
let activeTab: 'chat' | 'extract' | 'classify' | 'compare' | 'benchmark' = $state('chat');
|
||||
const supported = isLocalLlmSupported();
|
||||
const status = getLocalLlmStatus();
|
||||
|
||||
|
|
@ -56,6 +83,16 @@
|
|||
error?: string;
|
||||
}
|
||||
|
||||
interface CompareHistoryEntry {
|
||||
id: string;
|
||||
timestamp: number;
|
||||
prompt: string;
|
||||
systemPrompt: string;
|
||||
temperature: number;
|
||||
maxTokens: number;
|
||||
results: CompareResult[];
|
||||
}
|
||||
|
||||
let comparePrompt = $state('');
|
||||
let compareSystemPrompt = $state('');
|
||||
let compareTemperature = $state(0.7);
|
||||
|
|
@ -64,6 +101,79 @@
|
|||
let compareRunning = $state(false);
|
||||
let compareCurrentModel = $state<string | null>(null);
|
||||
let compareStreamingContent = $state('');
|
||||
let compareHistory = $state<CompareHistoryEntry[]>([]);
|
||||
let showHistory = $state(false);
|
||||
|
||||
function loadCompareHistory() {
|
||||
try {
|
||||
const stored = localStorage.getItem('llm-compare-history');
|
||||
if (stored) compareHistory = JSON.parse(stored);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
|
||||
function saveCompareHistory() {
|
||||
try {
|
||||
localStorage.setItem('llm-compare-history', JSON.stringify(compareHistory));
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
|
||||
function deleteHistoryEntry(id: string) {
|
||||
compareHistory = compareHistory.filter((e) => e.id !== id);
|
||||
saveCompareHistory();
|
||||
}
|
||||
|
||||
function restoreHistoryEntry(entry: CompareHistoryEntry) {
|
||||
comparePrompt = entry.prompt;
|
||||
compareSystemPrompt = entry.systemPrompt;
|
||||
compareTemperature = entry.temperature;
|
||||
compareMaxTokens = entry.maxTokens;
|
||||
compareResults = entry.results;
|
||||
showHistory = false;
|
||||
}
|
||||
|
||||
if (typeof window !== 'undefined') {
|
||||
loadCompareHistory();
|
||||
}
|
||||
|
||||
// Benchmark tab
|
||||
interface BenchmarkRun {
|
||||
iteration: number;
|
||||
latencyMs: number;
|
||||
tokPerSec: number;
|
||||
completionTokens: number;
|
||||
}
|
||||
|
||||
interface BenchmarkStats {
|
||||
runs: BenchmarkRun[];
|
||||
avgLatency: number;
|
||||
minLatency: number;
|
||||
maxLatency: number;
|
||||
medianLatency: number;
|
||||
avgTokPerSec: number;
|
||||
minTokPerSec: number;
|
||||
maxTokPerSec: number;
|
||||
medianTokPerSec: number;
|
||||
totalTokens: number;
|
||||
}
|
||||
|
||||
let benchmarkPrompt = $state('');
|
||||
let benchmarkSystemPrompt = $state('');
|
||||
let benchmarkIterations = $state(5);
|
||||
let benchmarkTemperature = $state(0.7);
|
||||
let benchmarkMaxTokens = $state(256);
|
||||
let benchmarkRunning = $state(false);
|
||||
let benchmarkCurrentRun = $state(0);
|
||||
let benchmarkStats = $state<BenchmarkStats | null>(null);
|
||||
|
||||
function median(arr: number[]): number {
|
||||
const sorted = [...arr].sort((a, b) => a - b);
|
||||
const mid = Math.floor(sorted.length / 2);
|
||||
return sorted.length % 2 !== 0 ? sorted[mid] : Math.round((sorted[mid - 1] + sorted[mid]) / 2);
|
||||
}
|
||||
|
||||
// --- Derived ---
|
||||
let isReady = $derived(status.current.state === 'ready');
|
||||
|
|
@ -95,7 +205,6 @@
|
|||
|
||||
let modelInfo = $derived(MODELS[selectedModel]);
|
||||
|
||||
// Auto-scroll chat to bottom on new messages/streaming
|
||||
$effect(() => {
|
||||
messages.length;
|
||||
streamingContent;
|
||||
|
|
@ -107,6 +216,7 @@
|
|||
// --- Actions ---
|
||||
async function handleLoad() {
|
||||
await loadLocalLlm(selectedModel);
|
||||
checkModelCache();
|
||||
}
|
||||
|
||||
async function handleUnload() {
|
||||
|
|
@ -119,7 +229,6 @@
|
|||
|
||||
async function handleSend() {
|
||||
if (!userInput.trim() || isGenerating) return;
|
||||
|
||||
const userMsg = userInput.trim();
|
||||
messages = [...messages, { role: 'user', content: userMsg }];
|
||||
userInput = '';
|
||||
|
|
@ -128,12 +237,8 @@
|
|||
|
||||
try {
|
||||
const msgs: { role: 'system' | 'user' | 'assistant'; content: string }[] = [];
|
||||
if (systemPrompt.trim()) {
|
||||
msgs.push({ role: 'system', content: systemPrompt.trim() });
|
||||
}
|
||||
for (const m of messages) {
|
||||
msgs.push({ role: m.role, content: m.content });
|
||||
}
|
||||
if (systemPrompt.trim()) msgs.push({ role: 'system', content: systemPrompt.trim() });
|
||||
for (const m of messages) msgs.push({ role: m.role, content: m.content });
|
||||
|
||||
const result = await generate({
|
||||
messages: msgs,
|
||||
|
|
@ -203,18 +308,15 @@
|
|||
compareStreamingContent = '';
|
||||
|
||||
const msgs: { role: 'system' | 'user'; content: string }[] = [];
|
||||
if (compareSystemPrompt.trim()) {
|
||||
if (compareSystemPrompt.trim())
|
||||
msgs.push({ role: 'system', content: compareSystemPrompt.trim() });
|
||||
}
|
||||
msgs.push({ role: 'user', content: comparePrompt.trim() });
|
||||
|
||||
for (const modelKey of modelKeys) {
|
||||
compareCurrentModel = MODELS[modelKey].displayName;
|
||||
compareStreamingContent = '';
|
||||
|
||||
try {
|
||||
await loadLocalLlm(modelKey);
|
||||
|
||||
const result = await generate({
|
||||
messages: msgs,
|
||||
temperature: compareTemperature,
|
||||
|
|
@ -223,12 +325,10 @@
|
|||
compareStreamingContent += token;
|
||||
},
|
||||
});
|
||||
|
||||
const tokPerSec =
|
||||
result.latencyMs > 0
|
||||
? Math.round((result.usage.completion_tokens / result.latencyMs) * 1000)
|
||||
: 0;
|
||||
|
||||
compareResults = [
|
||||
...compareResults,
|
||||
{
|
||||
|
|
@ -256,13 +356,81 @@
|
|||
},
|
||||
];
|
||||
}
|
||||
|
||||
await unloadLocalLlm();
|
||||
}
|
||||
|
||||
const entry: CompareHistoryEntry = {
|
||||
id: crypto.randomUUID(),
|
||||
timestamp: Date.now(),
|
||||
prompt: comparePrompt,
|
||||
systemPrompt: compareSystemPrompt,
|
||||
temperature: compareTemperature,
|
||||
maxTokens: compareMaxTokens,
|
||||
results: compareResults,
|
||||
};
|
||||
compareHistory = [entry, ...compareHistory].slice(0, 20);
|
||||
saveCompareHistory();
|
||||
|
||||
compareCurrentModel = null;
|
||||
compareStreamingContent = '';
|
||||
compareRunning = false;
|
||||
checkModelCache();
|
||||
}
|
||||
|
||||
async function handleBenchmark() {
|
||||
if (!benchmarkPrompt.trim() || benchmarkRunning || !isReady) return;
|
||||
benchmarkRunning = true;
|
||||
benchmarkStats = null;
|
||||
benchmarkCurrentRun = 0;
|
||||
|
||||
const msgs: { role: 'system' | 'user'; content: string }[] = [];
|
||||
if (benchmarkSystemPrompt.trim())
|
||||
msgs.push({ role: 'system', content: benchmarkSystemPrompt.trim() });
|
||||
msgs.push({ role: 'user', content: benchmarkPrompt.trim() });
|
||||
|
||||
const runs: BenchmarkRun[] = [];
|
||||
for (let i = 0; i < benchmarkIterations; i++) {
|
||||
benchmarkCurrentRun = i + 1;
|
||||
try {
|
||||
const result = await generate({
|
||||
messages: msgs,
|
||||
temperature: benchmarkTemperature,
|
||||
maxTokens: benchmarkMaxTokens,
|
||||
});
|
||||
const tokPerSec =
|
||||
result.latencyMs > 0
|
||||
? Math.round((result.usage.completion_tokens / result.latencyMs) * 1000)
|
||||
: 0;
|
||||
runs.push({
|
||||
iteration: i + 1,
|
||||
latencyMs: result.latencyMs,
|
||||
tokPerSec,
|
||||
completionTokens: result.usage.completion_tokens,
|
||||
});
|
||||
} catch {
|
||||
runs.push({ iteration: i + 1, latencyMs: 0, tokPerSec: 0, completionTokens: 0 });
|
||||
}
|
||||
}
|
||||
|
||||
const latencies = runs.map((r) => r.latencyMs).filter((l) => l > 0);
|
||||
const speeds = runs.map((r) => r.tokPerSec).filter((s) => s > 0);
|
||||
benchmarkStats = {
|
||||
runs,
|
||||
avgLatency: latencies.length
|
||||
? Math.round(latencies.reduce((a, b) => a + b, 0) / latencies.length)
|
||||
: 0,
|
||||
minLatency: latencies.length ? Math.min(...latencies) : 0,
|
||||
maxLatency: latencies.length ? Math.max(...latencies) : 0,
|
||||
medianLatency: latencies.length ? median(latencies) : 0,
|
||||
avgTokPerSec: speeds.length
|
||||
? Math.round(speeds.reduce((a, b) => a + b, 0) / speeds.length)
|
||||
: 0,
|
||||
minTokPerSec: speeds.length ? Math.min(...speeds) : 0,
|
||||
maxTokPerSec: speeds.length ? Math.max(...speeds) : 0,
|
||||
medianTokPerSec: speeds.length ? median(speeds) : 0,
|
||||
totalTokens: runs.reduce((a, r) => a + r.completionTokens, 0),
|
||||
};
|
||||
benchmarkRunning = false;
|
||||
}
|
||||
|
||||
function handleClear() {
|
||||
|
|
@ -278,10 +446,29 @@
|
|||
handleSend();
|
||||
}
|
||||
}
|
||||
|
||||
function formatTime(ts: number): string {
|
||||
return new Date(ts).toLocaleString('de-DE', {
|
||||
day: '2-digit',
|
||||
month: '2-digit',
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
});
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Local LLM Test - ManaCore</title>
|
||||
{@html `<style>
|
||||
.llm-prose { line-height: 1.6; }
|
||||
.llm-prose p { margin: 0.4em 0; }
|
||||
.llm-prose pre { background: var(--color-muted, #1e1e2e); border-radius: 0.5rem; padding: 0.75rem; overflow-x: auto; margin: 0.5em 0; }
|
||||
.llm-prose code { font-size: 0.85em; background: var(--color-muted, #1e1e2e); padding: 0.15em 0.3em; border-radius: 0.25rem; }
|
||||
.llm-prose pre code { background: none; padding: 0; }
|
||||
.llm-prose ul, .llm-prose ol { padding-left: 1.5em; margin: 0.4em 0; }
|
||||
.llm-prose h1, .llm-prose h2, .llm-prose h3 { margin: 0.6em 0 0.3em; font-weight: 600; }
|
||||
.llm-prose blockquote { border-left: 3px solid var(--color-border, #444); padding-left: 0.75em; margin: 0.4em 0; opacity: 0.8; }
|
||||
</style>`}
|
||||
</svelte:head>
|
||||
|
||||
<div class="mx-auto max-w-4xl">
|
||||
|
|
@ -296,12 +483,11 @@
|
|||
<div class="rounded-xl border border-red-500/30 bg-red-500/10 p-6 text-center">
|
||||
<p class="text-lg font-semibold text-red-400">WebGPU nicht verfügbar</p>
|
||||
<p class="mt-2 text-sm text-muted-foreground">
|
||||
Dieses Feature benötigt einen Browser mit WebGPU-Support (Chrome 113+, Edge 113+). Safari
|
||||
und Firefox haben experimentelle Unterstützung.
|
||||
Dieses Feature benötigt einen Browser mit WebGPU-Support (Chrome 113+, Edge 113+).
|
||||
</p>
|
||||
</div>
|
||||
{:else}
|
||||
<!-- Model Controls (hidden on Compare tab — it manages models itself) -->
|
||||
<!-- Model Controls (hidden on Compare tab) -->
|
||||
{#if activeTab !== 'compare'}
|
||||
<div class="mb-6 rounded-xl border border-border bg-card p-4">
|
||||
<div class="flex flex-wrap items-center gap-4">
|
||||
|
|
@ -316,35 +502,39 @@
|
|||
class="rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
|
||||
>
|
||||
{#each Object.entries(MODELS) as [key, model]}
|
||||
<option value={key}>{model.displayName}</option>
|
||||
<option value={key}
|
||||
>{model.displayName}{modelCacheStatus[key] ? ' (cached)' : ''}</option
|
||||
>
|
||||
{/each}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="flex flex-col gap-0.5 text-xs text-muted-foreground">
|
||||
<span>Download: ~{modelInfo.downloadSizeMb} MB</span>
|
||||
<span>RAM: ~{modelInfo.ramUsageMb} MB</span>
|
||||
{#if modelCacheStatus[selectedModel] !== undefined}
|
||||
<span
|
||||
class={modelCacheStatus[selectedModel] ? 'text-green-500' : 'text-muted-foreground'}
|
||||
>
|
||||
{modelCacheStatus[selectedModel] ? 'Im Cache' : 'Nicht im Cache'}
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<div class="flex items-center gap-2">
|
||||
{#if isReady}
|
||||
<button
|
||||
onclick={handleUnload}
|
||||
class="rounded-lg border border-border px-4 py-1.5 text-sm text-muted-foreground transition-colors hover:bg-muted"
|
||||
>Entladen</button
|
||||
>
|
||||
Entladen
|
||||
</button>
|
||||
{:else}
|
||||
<button
|
||||
onclick={handleLoad}
|
||||
disabled={isLoading}
|
||||
class="rounded-lg bg-primary px-4 py-1.5 text-sm font-medium text-primary-foreground transition-opacity disabled:opacity-50"
|
||||
>{isLoading ? 'Lädt...' : 'Modell laden'}</button
|
||||
>
|
||||
{isLoading ? 'Lädt...' : 'Modell laden'}
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<div class="ml-auto flex items-center gap-2">
|
||||
<div
|
||||
class="h-2.5 w-2.5 rounded-full {isReady
|
||||
|
|
@ -358,7 +548,6 @@
|
|||
<span class="text-xs text-muted-foreground">{statusText}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{#if progress !== null}
|
||||
<div class="mt-3 h-2 overflow-hidden rounded-full bg-muted">
|
||||
<div
|
||||
|
|
@ -367,21 +556,39 @@
|
|||
></div>
|
||||
</div>
|
||||
{/if}
|
||||
<!-- Cache overview -->
|
||||
{#if Object.keys(modelCacheStatus).length > 0}
|
||||
<div class="mt-3 flex flex-wrap gap-2 border-t border-border pt-3">
|
||||
{#each modelKeys as key}
|
||||
<div
|
||||
class="flex items-center gap-1.5 rounded-md border border-border px-2.5 py-1 text-xs"
|
||||
>
|
||||
<div
|
||||
class="h-1.5 w-1.5 rounded-full {modelCacheStatus[key]
|
||||
? 'bg-green-500'
|
||||
: 'bg-muted-foreground/30'}"
|
||||
></div>
|
||||
<span class="text-muted-foreground">{MODELS[key].displayName}</span>
|
||||
{#if modelCacheStatus[key]}
|
||||
<span class="text-green-500">~{MODELS[key].downloadSizeMb} MB</span>
|
||||
{/if}
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Tabs -->
|
||||
<div class="mb-4 flex gap-1 rounded-lg border border-border bg-card p-1">
|
||||
{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }] as tab}
|
||||
{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }, { id: 'benchmark', label: 'Benchmark' }] as tab}
|
||||
<button
|
||||
onclick={() => (activeTab = tab.id as typeof activeTab)}
|
||||
class="flex-1 rounded-md px-3 py-1.5 text-sm font-medium transition-colors {activeTab ===
|
||||
tab.id
|
||||
? 'bg-primary text-primary-foreground'
|
||||
: 'text-muted-foreground hover:text-foreground'}"
|
||||
: 'text-muted-foreground hover:text-foreground'}">{tab.label}</button
|
||||
>
|
||||
{tab.label}
|
||||
</button>
|
||||
{/each}
|
||||
</div>
|
||||
|
||||
|
|
@ -394,7 +601,6 @@
|
|||
placeholder="System Prompt (optional)..."
|
||||
class="rounded-xl border border-border bg-card px-4 py-2.5 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
|
||||
/>
|
||||
|
||||
<div
|
||||
bind:this={chatContainer}
|
||||
class="max-h-[60vh] min-h-[300px] space-y-3 overflow-y-auto rounded-xl border border-border bg-background/50 p-4"
|
||||
|
|
@ -420,38 +626,41 @@
|
|||
<div class="mb-1 text-xs font-medium text-muted-foreground">
|
||||
{msg.role === 'user' ? 'Du' : modelInfo.displayName}
|
||||
</div>
|
||||
<div class="whitespace-pre-wrap text-sm text-foreground">{msg.content}</div>
|
||||
{#if msg.role === 'assistant'}
|
||||
<div class="llm-prose text-sm text-foreground">
|
||||
{@html renderMarkdown(msg.content)}
|
||||
</div>
|
||||
{:else}
|
||||
<div class="whitespace-pre-wrap text-sm text-foreground">{msg.content}</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/each}
|
||||
|
||||
{#if streamingContent}
|
||||
<div class="mr-8 rounded-lg border border-border bg-card p-3">
|
||||
<div class="mb-1 text-xs font-medium text-muted-foreground">
|
||||
{modelInfo.displayName}
|
||||
</div>
|
||||
<div class="whitespace-pre-wrap text-sm text-foreground">
|
||||
{streamingContent}<span class="animate-pulse">|</span>
|
||||
<div class="llm-prose text-sm text-foreground">
|
||||
{@html renderMarkdown(streamingContent)}<span class="animate-pulse">|</span>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
{#if lastLatency !== null}
|
||||
<div class="flex gap-4 text-xs text-muted-foreground">
|
||||
<span>Latenz: {lastLatency}ms</span>
|
||||
{#if lastTokens}
|
||||
<span>Prompt: {lastTokens.prompt} tokens</span>
|
||||
<span>Completion: {lastTokens.completion} tokens</span>
|
||||
<span>
|
||||
Speed: {lastLatency > 0
|
||||
<span
|
||||
>Speed: {lastLatency > 0
|
||||
? Math.round((lastTokens.completion / lastLatency) * 1000)
|
||||
: 0} tok/s
|
||||
</span>
|
||||
: 0} tok/s</span
|
||||
>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<div class="flex gap-3">
|
||||
<textarea
|
||||
bind:value={userInput}
|
||||
|
|
@ -466,15 +675,13 @@
|
|||
onclick={handleSend}
|
||||
disabled={!isReady || !userInput.trim() || isGenerating}
|
||||
class="rounded-xl bg-primary px-4 py-3 text-sm font-medium text-primary-foreground transition-opacity disabled:opacity-50"
|
||||
><PaperPlaneRight size={18} /></button
|
||||
>
|
||||
<PaperPlaneRight size={18} />
|
||||
</button>
|
||||
<button
|
||||
onclick={handleClear}
|
||||
class="rounded-xl border border-border px-4 py-3 text-sm text-muted-foreground transition-colors hover:bg-muted"
|
||||
><Trash size={18} /></button
|
||||
>
|
||||
<Trash size={18} />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -485,8 +692,7 @@
|
|||
<div class="flex flex-col gap-4">
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<p class="mb-3 text-sm text-muted-foreground">
|
||||
Extrahiere strukturiertes JSON aus beliebigem Text. Das LLM analysiert den Text und gibt
|
||||
ein JSON-Objekt zurück.
|
||||
Extrahiere strukturiertes JSON aus beliebigem Text.
|
||||
</p>
|
||||
<input
|
||||
type="text"
|
||||
|
|
@ -504,11 +710,9 @@
|
|||
onclick={handleExtract}
|
||||
disabled={!isReady || !extractText.trim() || extractLoading}
|
||||
class="mt-3 rounded-lg bg-primary px-4 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
|
||||
>{extractLoading ? 'Extrahiere...' : 'JSON extrahieren'}</button
|
||||
>
|
||||
{extractLoading ? 'Extrahiere...' : 'JSON extrahieren'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{#if extractResult}
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<div class="mb-2 text-xs font-medium text-muted-foreground">Ergebnis</div>
|
||||
|
|
@ -524,8 +728,7 @@
|
|||
<div class="flex flex-col gap-4">
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<p class="mb-3 text-sm text-muted-foreground">
|
||||
Klassifiziere Text in eine von mehreren Kategorien. Das LLM wählt die passendste
|
||||
Kategorie.
|
||||
Klassifiziere Text in eine von mehreren Kategorien.
|
||||
</p>
|
||||
<input
|
||||
type="text"
|
||||
|
|
@ -543,11 +746,9 @@
|
|||
onclick={handleClassify}
|
||||
disabled={!isReady || !classifyText.trim() || classifyLoading}
|
||||
class="mt-3 rounded-lg bg-primary px-4 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
|
||||
>{classifyLoading ? 'Klassifiziere...' : 'Klassifizieren'}</button
|
||||
>
|
||||
{classifyLoading ? 'Klassifiziere...' : 'Klassifizieren'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{#if classifyResult}
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<div class="mb-2 text-xs font-medium text-muted-foreground">Ergebnis</div>
|
||||
|
|
@ -563,10 +764,53 @@
|
|||
{#if activeTab === 'compare'}
|
||||
<div class="flex flex-col gap-4">
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<p class="mb-3 text-sm text-muted-foreground">
|
||||
Denselben Prompt sequentiell gegen alle {modelKeys.length} Modelle testen. Jedes Modell wird
|
||||
geladen, inferiert und wieder entladen — die Ergebnisse erscheinen nebeneinander.
|
||||
</p>
|
||||
<div class="mb-3 flex items-center justify-between">
|
||||
<p class="text-sm text-muted-foreground">
|
||||
Denselben Prompt gegen alle {modelKeys.length} Modelle testen.
|
||||
</p>
|
||||
{#if compareHistory.length > 0}
|
||||
<button
|
||||
onclick={() => (showHistory = !showHistory)}
|
||||
class="flex items-center gap-1.5 rounded-lg border border-border px-3 py-1 text-xs text-muted-foreground transition-colors hover:bg-muted"
|
||||
>
|
||||
<ClockCounterClockwise size={14} />
|
||||
History ({compareHistory.length})
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
{#if showHistory}
|
||||
<div
|
||||
class="mb-4 max-h-64 overflow-y-auto rounded-lg border border-border bg-background p-3"
|
||||
>
|
||||
{#each compareHistory as entry}
|
||||
<div
|
||||
class="flex items-center justify-between border-b border-border py-2 last:border-0"
|
||||
>
|
||||
<div class="min-w-0 flex-1">
|
||||
<div class="truncate text-sm text-foreground">{entry.prompt}</div>
|
||||
<div class="flex gap-3 text-xs text-muted-foreground">
|
||||
<span>{formatTime(entry.timestamp)}</span>
|
||||
<span>{entry.results.length} Modelle</span>
|
||||
<span>T={entry.temperature}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex gap-1.5">
|
||||
<button
|
||||
onclick={() => restoreHistoryEntry(entry)}
|
||||
class="rounded px-2.5 py-1 text-xs text-primary hover:bg-primary/10"
|
||||
>Laden</button
|
||||
>
|
||||
<button
|
||||
onclick={() => deleteHistoryEntry(entry.id)}
|
||||
class="rounded px-2.5 py-1 text-xs text-red-400 hover:bg-red-500/10"
|
||||
>×</button
|
||||
>
|
||||
</div>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<input
|
||||
type="text"
|
||||
|
|
@ -574,15 +818,13 @@
|
|||
placeholder="System Prompt (optional)..."
|
||||
class="mb-3 w-full rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
|
||||
/>
|
||||
|
||||
<textarea
|
||||
bind:value={comparePrompt}
|
||||
placeholder="Prompt eingeben, der gegen alle Modelle getestet wird... z.B.: Erkläre Quantencomputing in 3 Sätzen."
|
||||
placeholder="Prompt eingeben, der gegen alle Modelle getestet wird..."
|
||||
rows={4}
|
||||
disabled={compareRunning}
|
||||
class="w-full resize-none rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none disabled:opacity-50"
|
||||
></textarea>
|
||||
|
||||
<div class="mt-3 flex flex-wrap items-end gap-4">
|
||||
<div class="flex flex-col gap-1">
|
||||
<label for="compare-temp" class="text-xs text-muted-foreground">Temperature</label>
|
||||
|
|
@ -614,21 +856,21 @@
|
|||
onclick={handleCompare}
|
||||
disabled={!comparePrompt.trim() || compareRunning}
|
||||
class="rounded-lg bg-primary px-5 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
|
||||
>{compareRunning
|
||||
? 'Läuft...'
|
||||
: `Alle ${modelKeys.length} Modelle vergleichen`}</button
|
||||
>
|
||||
{compareRunning ? 'Läuft...' : `Alle ${modelKeys.length} Modelle vergleichen`}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Running indicator -->
|
||||
{#if compareRunning && compareCurrentModel}
|
||||
<div class="rounded-xl border border-yellow-500/30 bg-yellow-500/5 p-4">
|
||||
<div class="mb-2 flex items-center gap-2">
|
||||
<div class="h-2.5 w-2.5 animate-pulse rounded-full bg-yellow-500"></div>
|
||||
<span class="text-sm font-medium text-foreground">{compareCurrentModel}</span>
|
||||
<span class="text-xs text-muted-foreground">
|
||||
({compareResults.length + 1}/{modelKeys.length})
|
||||
</span>
|
||||
<span class="text-xs text-muted-foreground"
|
||||
>({compareResults.length + 1}/{modelKeys.length})</span
|
||||
>
|
||||
</div>
|
||||
{#if compareStreamingContent}
|
||||
<div
|
||||
|
|
@ -642,9 +884,7 @@
|
|||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Results -->
|
||||
{#if compareResults.length > 0}
|
||||
<!-- Stats comparison table -->
|
||||
<div class="overflow-x-auto rounded-xl border border-border">
|
||||
<table class="w-full text-sm">
|
||||
<thead>
|
||||
|
|
@ -652,51 +892,42 @@
|
|||
<th class="px-4 py-2.5 text-left text-xs font-medium text-muted-foreground"
|
||||
>Metrik</th
|
||||
>
|
||||
{#each compareResults as r}
|
||||
<th class="px-4 py-2.5 text-left text-xs font-medium text-muted-foreground">
|
||||
{r.displayName}
|
||||
</th>
|
||||
{/each}
|
||||
{#each compareResults as r}<th
|
||||
class="px-4 py-2.5 text-left text-xs font-medium text-muted-foreground"
|
||||
>{r.displayName}</th
|
||||
>{/each}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="border-b border-border">
|
||||
<td class="px-4 py-2 text-muted-foreground">Latenz</td>
|
||||
{#each compareResults as r}
|
||||
<td class="px-4 py-2 font-mono {r.error ? 'text-red-400' : 'text-foreground'}">
|
||||
{r.error ? 'Fehler' : `${(r.latencyMs / 1000).toFixed(1)}s`}
|
||||
</td>
|
||||
{/each}
|
||||
{#each compareResults as r}<td
|
||||
class="px-4 py-2 font-mono {r.error ? 'text-red-400' : 'text-foreground'}"
|
||||
>{r.error ? 'Fehler' : `${(r.latencyMs / 1000).toFixed(1)}s`}</td
|
||||
>{/each}
|
||||
</tr>
|
||||
<tr class="border-b border-border">
|
||||
<td class="px-4 py-2 text-muted-foreground">Speed</td>
|
||||
{#each compareResults as r}
|
||||
<td class="px-4 py-2 font-mono text-foreground">
|
||||
{r.error ? '—' : `${r.tokPerSec} tok/s`}
|
||||
</td>
|
||||
{/each}
|
||||
{#each compareResults as r}<td class="px-4 py-2 font-mono text-foreground"
|
||||
>{r.error ? '—' : `${r.tokPerSec} tok/s`}</td
|
||||
>{/each}
|
||||
</tr>
|
||||
<tr class="border-b border-border">
|
||||
<td class="px-4 py-2 text-muted-foreground">Prompt Tokens</td>
|
||||
{#each compareResults as r}
|
||||
<td class="px-4 py-2 font-mono text-foreground">
|
||||
{r.error ? '—' : r.promptTokens}
|
||||
</td>
|
||||
{/each}
|
||||
{#each compareResults as r}<td class="px-4 py-2 font-mono text-foreground"
|
||||
>{r.error ? '—' : r.promptTokens}</td
|
||||
>{/each}
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="px-4 py-2 text-muted-foreground">Completion Tokens</td>
|
||||
{#each compareResults as r}
|
||||
<td class="px-4 py-2 font-mono text-foreground">
|
||||
{r.error ? '—' : r.completionTokens}
|
||||
</td>
|
||||
{/each}
|
||||
{#each compareResults as r}<td class="px-4 py-2 font-mono text-foreground"
|
||||
>{r.error ? '—' : r.completionTokens}</td
|
||||
>{/each}
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<!-- Outputs side by side -->
|
||||
<div
|
||||
class="grid gap-4"
|
||||
style="grid-template-columns: repeat({compareResults.length}, minmax(0, 1fr));"
|
||||
|
|
@ -710,14 +941,10 @@
|
|||
>
|
||||
</div>
|
||||
{#if r.error}
|
||||
<div class="rounded-lg bg-red-500/10 p-3 text-sm text-red-400">
|
||||
{r.error}
|
||||
</div>
|
||||
<div class="rounded-lg bg-red-500/10 p-3 text-sm text-red-400">{r.error}</div>
|
||||
{:else}
|
||||
<div
|
||||
class="max-h-[50vh] overflow-y-auto whitespace-pre-wrap text-sm text-foreground"
|
||||
>
|
||||
{r.content}
|
||||
<div class="llm-prose max-h-[50vh] overflow-y-auto text-sm text-foreground">
|
||||
{@html renderMarkdown(r.content)}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
@ -726,5 +953,186 @@
|
|||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Benchmark Tab -->
|
||||
{#if activeTab === 'benchmark'}
|
||||
<div class="flex flex-col gap-4">
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<p class="mb-3 text-sm text-muted-foreground">
|
||||
Denselben Prompt N-mal gegen das geladene Modell laufen lassen, um Varianz zu messen.
|
||||
</p>
|
||||
|
||||
{#if !isReady}
|
||||
<div class="rounded-lg bg-yellow-500/10 p-3 text-sm text-yellow-400">
|
||||
Lade zuerst ein Modell im Model-Controls-Bereich oben.
|
||||
</div>
|
||||
{:else}
|
||||
<input
|
||||
type="text"
|
||||
bind:value={benchmarkSystemPrompt}
|
||||
placeholder="System Prompt (optional)..."
|
||||
class="mb-3 w-full rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
|
||||
/>
|
||||
<textarea
|
||||
bind:value={benchmarkPrompt}
|
||||
placeholder="Prompt für den Benchmark... z.B.: Zähle von 1 bis 20."
|
||||
rows={3}
|
||||
disabled={benchmarkRunning}
|
||||
class="w-full resize-none rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none disabled:opacity-50"
|
||||
></textarea>
|
||||
<div class="mt-3 flex flex-wrap items-end gap-4">
|
||||
<div class="flex flex-col gap-1">
|
||||
<label for="bench-iters" class="text-xs text-muted-foreground">Iterationen</label>
|
||||
<input
|
||||
id="bench-iters"
|
||||
type="number"
|
||||
min="1"
|
||||
max="50"
|
||||
bind:value={benchmarkIterations}
|
||||
disabled={benchmarkRunning}
|
||||
class="w-24 rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
|
||||
/>
|
||||
</div>
|
||||
<div class="flex flex-col gap-1">
|
||||
<label for="bench-temp" class="text-xs text-muted-foreground">Temperature</label>
|
||||
<input
|
||||
id="bench-temp"
|
||||
type="number"
|
||||
min="0"
|
||||
max="2"
|
||||
step="0.1"
|
||||
bind:value={benchmarkTemperature}
|
||||
disabled={benchmarkRunning}
|
||||
class="w-24 rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
|
||||
/>
|
||||
</div>
|
||||
<div class="flex flex-col gap-1">
|
||||
<label for="bench-tokens" class="text-xs text-muted-foreground">Max Tokens</label>
|
||||
<input
|
||||
id="bench-tokens"
|
||||
type="number"
|
||||
min="16"
|
||||
max="2048"
|
||||
step="16"
|
||||
bind:value={benchmarkMaxTokens}
|
||||
disabled={benchmarkRunning}
|
||||
class="w-24 rounded-lg border border-border bg-background px-3 py-1.5 text-sm text-foreground disabled:opacity-50"
|
||||
/>
|
||||
</div>
|
||||
<button
|
||||
onclick={handleBenchmark}
|
||||
disabled={!benchmarkPrompt.trim() || benchmarkRunning}
|
||||
class="rounded-lg bg-primary px-5 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
|
||||
>
|
||||
{#if benchmarkRunning}Run {benchmarkCurrentRun}/{benchmarkIterations}...{:else}Benchmark
|
||||
starten{/if}
|
||||
</button>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
{#if benchmarkRunning}
|
||||
<div class="rounded-xl border border-yellow-500/30 bg-yellow-500/5 p-4">
|
||||
<div class="mb-2 flex items-center gap-2">
|
||||
<div class="h-2.5 w-2.5 animate-pulse rounded-full bg-yellow-500"></div>
|
||||
<span class="text-sm font-medium text-foreground"
|
||||
>{modelInfo.displayName} — Run {benchmarkCurrentRun}/{benchmarkIterations}</span
|
||||
>
|
||||
</div>
|
||||
<div class="h-2 overflow-hidden rounded-full bg-muted">
|
||||
<div
|
||||
class="h-full rounded-full bg-primary transition-all duration-300"
|
||||
style="width: {Math.round((benchmarkCurrentRun / benchmarkIterations) * 100)}%"
|
||||
></div>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
{#if benchmarkStats}
|
||||
<div class="grid grid-cols-2 gap-4">
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<div class="mb-2 text-xs font-medium text-muted-foreground">Latenz (ms)</div>
|
||||
<div class="grid grid-cols-2 gap-y-2 text-sm">
|
||||
<span class="text-muted-foreground">Durchschnitt</span><span
|
||||
class="font-mono text-foreground">{benchmarkStats.avgLatency}</span
|
||||
>
|
||||
<span class="text-muted-foreground">Median</span><span
|
||||
class="font-mono text-foreground">{benchmarkStats.medianLatency}</span
|
||||
>
|
||||
<span class="text-muted-foreground">Min</span><span class="font-mono text-green-500"
|
||||
>{benchmarkStats.minLatency}</span
|
||||
>
|
||||
<span class="text-muted-foreground">Max</span><span class="font-mono text-red-400"
|
||||
>{benchmarkStats.maxLatency}</span
|
||||
>
|
||||
</div>
|
||||
</div>
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<div class="mb-2 text-xs font-medium text-muted-foreground">Speed (tok/s)</div>
|
||||
<div class="grid grid-cols-2 gap-y-2 text-sm">
|
||||
<span class="text-muted-foreground">Durchschnitt</span><span
|
||||
class="font-mono text-foreground">{benchmarkStats.avgTokPerSec}</span
|
||||
>
|
||||
<span class="text-muted-foreground">Median</span><span
|
||||
class="font-mono text-foreground">{benchmarkStats.medianTokPerSec}</span
|
||||
>
|
||||
<span class="text-muted-foreground">Max</span><span class="font-mono text-green-500"
|
||||
>{benchmarkStats.maxTokPerSec}</span
|
||||
>
|
||||
<span class="text-muted-foreground">Min</span><span class="font-mono text-red-400"
|
||||
>{benchmarkStats.minTokPerSec}</span
|
||||
>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="overflow-x-auto rounded-xl border border-border">
|
||||
<table class="w-full text-sm">
|
||||
<thead>
|
||||
<tr class="border-b border-border bg-card">
|
||||
<th class="px-4 py-2 text-left text-xs font-medium text-muted-foreground">Run</th>
|
||||
<th class="px-4 py-2 text-left text-xs font-medium text-muted-foreground"
|
||||
>Latenz</th
|
||||
>
|
||||
<th class="px-4 py-2 text-left text-xs font-medium text-muted-foreground"
|
||||
>tok/s</th
|
||||
>
|
||||
<th class="px-4 py-2 text-left text-xs font-medium text-muted-foreground"
|
||||
>Tokens</th
|
||||
>
|
||||
<th class="px-4 py-2 text-left text-xs font-medium text-muted-foreground"
|
||||
>Verteilung</th
|
||||
>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{#each benchmarkStats.runs as run}
|
||||
{@const maxLat = benchmarkStats.maxLatency || 1}
|
||||
<tr class="border-b border-border last:border-0">
|
||||
<td class="px-4 py-2 font-mono text-muted-foreground">#{run.iteration}</td>
|
||||
<td class="px-4 py-2 font-mono text-foreground"
|
||||
>{(run.latencyMs / 1000).toFixed(2)}s</td
|
||||
>
|
||||
<td class="px-4 py-2 font-mono text-foreground">{run.tokPerSec}</td>
|
||||
<td class="px-4 py-2 font-mono text-foreground">{run.completionTokens}</td>
|
||||
<td class="px-4 py-2"
|
||||
><div class="h-3 w-full rounded-full bg-muted">
|
||||
<div
|
||||
class="h-full rounded-full bg-primary/60"
|
||||
style="width: {Math.round((run.latencyMs / maxLat) * 100)}%"
|
||||
></div>
|
||||
</div></td
|
||||
>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<div class="text-xs text-muted-foreground">
|
||||
Total: {benchmarkStats.totalTokens} tokens in {benchmarkStats.runs.length} runs ({modelInfo.displayName})
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
{/if}
|
||||
</div>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue