mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 22:01:09 +02:00
✨ feat(llm-playground): add model metadata system and SSD documentation
- Add MODEL_METADATA config for Ollama models with descriptions and modality - Update default model to gemma3:4b - Show model descriptions in ModelSelector and ComparisonSelector - Add docs/OLLAMA_MODELS.md with instructions for adding new models - Document external 4TB SSD setup in MAC_MINI_SERVER.md - Add gemma3:12b, gemma3:27b, qwen2.5-coder:14b to model registry
This commit is contained in:
parent
a341aa1b13
commit
213740411b
6 changed files with 310 additions and 14 deletions
|
|
@ -428,6 +428,49 @@ Ollama läuft nativ auf dem Mac Mini für lokale LLM-Inferenz (Klassifizierung,
|
|||
|
||||
- **Chip:** Apple M4 (10 Cores)
|
||||
- **RAM:** 16 GB Unified Memory
|
||||
- **Interne SSD:** 228 GB
|
||||
- **Externe SSD:** 4 TB (TillJakob-S04)
|
||||
|
||||
## Externe 4TB SSD
|
||||
|
||||
Die externe SSD wird für große Dateien verwendet, um die interne SSD zu entlasten.
|
||||
|
||||
### Mount-Punkt
|
||||
|
||||
- **Volume:** `/Volumes/TillJakob-S04`
|
||||
- **Geschwindigkeit:** ~1 GB/s (USB-C/Thunderbolt)
|
||||
|
||||
### Verzeichnisstruktur
|
||||
|
||||
```
|
||||
/Volumes/TillJakob-S04/ManaData/
|
||||
├── ollama/ # LLM Modelle (~60 GB)
|
||||
├── stt-models/ # Speech-to-Text Modelle (~19 GB)
|
||||
├── flux2/ # FLUX.2 Bildgenerierung (~15 GB)
|
||||
├── backups/ # PostgreSQL Backups
|
||||
└── docker/ # (Optional) Docker Data
|
||||
```
|
||||
|
||||
### Symlinks
|
||||
|
||||
| Original | Symlink |
|
||||
|----------|---------|
|
||||
| `~/.ollama` | `/Volumes/TillJakob-S04/ManaData/ollama` |
|
||||
| `~/stt-models` | `/Volumes/TillJakob-S04/ManaData/stt-models` |
|
||||
| `~/flux2` | `/Volumes/TillJakob-S04/ManaData/flux2` |
|
||||
|
||||
### SSD prüfen
|
||||
|
||||
```bash
|
||||
# Mount-Status
|
||||
df -h /Volumes/TillJakob-S04
|
||||
|
||||
# Nutzung
|
||||
du -sh /Volumes/TillJakob-S04/ManaData/*
|
||||
|
||||
# Speed-Test
|
||||
dd if=/dev/zero of=/Volumes/TillJakob-S04/test bs=1m count=1024 && rm /Volumes/TillJakob-S04/test
|
||||
```
|
||||
|
||||
### Installation
|
||||
|
||||
|
|
@ -445,11 +488,28 @@ Optimierungen bereits aktiviert:
|
|||
- `OLLAMA_FLASH_ATTENTION=1` - Schnellere Attention-Berechnung
|
||||
- `OLLAMA_KV_CACHE_TYPE=q8_0` - Effizienterer KV-Cache
|
||||
|
||||
### Speicherort
|
||||
|
||||
Die Modelle liegen auf der externen 4TB SSD für mehr Platz:
|
||||
- **Pfad:** `/Volumes/TillJakob-S04/ManaData/ollama/models`
|
||||
- **Symlink:** `~/.ollama -> /Volumes/TillJakob-S04/ManaData/ollama`
|
||||
|
||||
### Verfügbare Modelle
|
||||
|
||||
| Modell | Größe | Zweck |
|
||||
|--------|-------|-------|
|
||||
| gemma3:4b | 3.3 GB | Klassifizierung, kurze Antworten |
|
||||
| Modell | Größe | Typ | Performance | Zweck |
|
||||
|--------|-------|-----|-------------|-------|
|
||||
| gemma3:4b | 3.3 GB | Text | ~53 t/s | Standard - schnell |
|
||||
| gemma3:12b | 8 GB | Text | ~30 t/s | Empfohlen - gute Balance |
|
||||
| gemma3:27b | 16 GB | Text | ~15 t/s | Beste Qualität |
|
||||
| phi3.5:latest | 2.2 GB | Text | ~60 t/s | Microsoft - kompakt |
|
||||
| ministral-3:3b | 3 GB | Text | ~55 t/s | Mistral Mini |
|
||||
| llava:7b | 4.7 GB | Vision | ~25 t/s | Bildverständnis |
|
||||
| qwen3-vl:4b | 3.3 GB | Vision | ~40 t/s | Vision-Language |
|
||||
| deepseek-ocr:latest | 6.7 GB | Vision | ~20 t/s | OCR & Dokumente |
|
||||
| qwen2.5-coder:7b | 4.7 GB | Code | ~35 t/s | Code-Generierung |
|
||||
| qwen2.5-coder:14b | 10 GB | Code | ~20 t/s | Erweiterte Code-Gen |
|
||||
|
||||
Siehe [OLLAMA_MODELS.md](./OLLAMA_MODELS.md) für Details zum Hinzufügen neuer Modelle.
|
||||
|
||||
```bash
|
||||
# Modelle auflisten
|
||||
|
|
|
|||
144
docs/OLLAMA_MODELS.md
Normal file
144
docs/OLLAMA_MODELS.md
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
# Ollama Models Management
|
||||
|
||||
Dokumentation zum Hinzufügen und Verwalten von Ollama-Modellen auf dem Mac Mini Server.
|
||||
|
||||
## Aktuelle Modelle
|
||||
|
||||
| Modell | Größe | Typ | Performance | Beschreibung |
|
||||
|--------|-------|-----|-------------|--------------|
|
||||
| `gemma3:4b` | 3.3 GB | Text | ~53 t/s | Standard - schnell für einfache Aufgaben |
|
||||
| `gemma3:12b` | 8 GB | Text | ~30 t/s | Empfohlen - gute Balance |
|
||||
| `gemma3:27b` | 16 GB | Text | ~15 t/s | Beste Qualität, langsamer |
|
||||
| `phi3.5:latest` | 2.2 GB | Text | ~60 t/s | Microsoft - kompakt & effizient |
|
||||
| `ministral-3:3b` | 3 GB | Text | ~55 t/s | Mistral Mini - sehr schnell |
|
||||
| `llava:7b` | 4.7 GB | Vision | ~25 t/s | Bildverständnis |
|
||||
| `qwen3-vl:4b` | 3.3 GB | Vision | ~40 t/s | Qwen Vision-Language |
|
||||
| `deepseek-ocr:latest` | 6.7 GB | Vision | ~20 t/s | OCR & Dokumente |
|
||||
| `qwen2.5-coder:7b` | 4.7 GB | Code | ~35 t/s | Code-Generierung |
|
||||
| `qwen2.5-coder:14b` | 10 GB | Code | ~20 t/s | Erweiterte Code-Generierung |
|
||||
|
||||
## Neues Modell hinzufügen
|
||||
|
||||
### Schritt 1: Modell auf Server installieren
|
||||
|
||||
```bash
|
||||
# Via SSH auf Mac Mini
|
||||
ssh mana-server
|
||||
|
||||
# Modell herunterladen
|
||||
/opt/homebrew/bin/ollama pull <model-name>
|
||||
|
||||
# Beispiel:
|
||||
/opt/homebrew/bin/ollama pull gemma3:12b
|
||||
```
|
||||
|
||||
### Schritt 2: Modell im LLM Playground registrieren
|
||||
|
||||
Datei: `services/llm-playground/src/lib/stores/models.svelte.ts`
|
||||
|
||||
Füge das Modell in `MODEL_METADATA` hinzu:
|
||||
|
||||
```typescript
|
||||
export const MODEL_METADATA: Record<string, { description: string; modality: Modality }> = {
|
||||
// ... bestehende Modelle ...
|
||||
|
||||
// Neues Modell hinzufügen:
|
||||
'neues-modell:tag': {
|
||||
description: 'Kurze Beschreibung des Modells',
|
||||
modality: 'text', // oder 'vision' oder 'code'
|
||||
},
|
||||
};
|
||||
```
|
||||
|
||||
### Schritt 3: Optional - Als Standard setzen
|
||||
|
||||
Datei: `services/llm-playground/src/lib/stores/settings.svelte.ts`
|
||||
|
||||
```typescript
|
||||
const defaultSettings: Settings = {
|
||||
model: 'ollama/neues-modell:tag', // Standard-Modell ändern
|
||||
// ...
|
||||
};
|
||||
```
|
||||
|
||||
### Schritt 4: Deployment
|
||||
|
||||
```bash
|
||||
# Playground neu bauen und deployen
|
||||
pnpm --filter @mana/llm-playground build
|
||||
|
||||
# Auf Mac Mini deployen
|
||||
ssh mana-server "cd ~/projects/manacore-monorepo && docker compose -f docker-compose.macmini.yml up -d --build llm-playground"
|
||||
```
|
||||
|
||||
## Modality-Typen
|
||||
|
||||
| Typ | Beschreibung | Pattern-Erkennung |
|
||||
|-----|--------------|-------------------|
|
||||
| `text` | Standard Text-Generierung | Default |
|
||||
| `vision` | Bildverständnis (LLaVA, etc.) | `llava`, `vision`, `-vl`, `ocr`, `moondream` |
|
||||
| `code` | Code-Generierung | `coder`, `codellama`, `starcoder` |
|
||||
|
||||
Die Modality wird automatisch erkannt, wenn das Modell nicht in `MODEL_METADATA` ist.
|
||||
|
||||
## Server-Verwaltung
|
||||
|
||||
### Modelle auflisten
|
||||
|
||||
```bash
|
||||
ssh mana-server "/opt/homebrew/bin/ollama list"
|
||||
```
|
||||
|
||||
### Modell entfernen
|
||||
|
||||
```bash
|
||||
ssh mana-server "/opt/homebrew/bin/ollama rm <model-name>"
|
||||
```
|
||||
|
||||
### Speicherort der Modelle
|
||||
|
||||
Die Modelle liegen auf der externen 4TB SSD:
|
||||
- Pfad: `/Volumes/TillJakob-S04/ManaData/ollama/models`
|
||||
- Symlink: `~/.ollama -> /Volumes/TillJakob-S04/ManaData/ollama`
|
||||
|
||||
### Ollama Service
|
||||
|
||||
```bash
|
||||
# Status prüfen
|
||||
ssh mana-server "pgrep -x ollama && echo 'Running' || echo 'Stopped'"
|
||||
|
||||
# Neustarten
|
||||
ssh mana-server "/opt/homebrew/bin/brew services restart ollama"
|
||||
|
||||
# Logs
|
||||
ssh mana-server "tail -f /opt/homebrew/var/log/ollama.log"
|
||||
```
|
||||
|
||||
## Performance-Empfehlungen
|
||||
|
||||
### RAM-Nutzung
|
||||
|
||||
| Modell-Größe | Min. RAM | Empfohlen |
|
||||
|--------------|----------|-----------|
|
||||
| 3-4B | 4 GB | 8 GB |
|
||||
| 7-8B | 6 GB | 12 GB |
|
||||
| 12-14B | 10 GB | 16 GB |
|
||||
| 27B+ | 16 GB | 24+ GB |
|
||||
|
||||
**Mac Mini M4 hat 16 GB RAM** - daher maximal ein 27B Modell gleichzeitig laden.
|
||||
|
||||
### Gleichzeitige Modelle
|
||||
|
||||
Ollama lädt Modelle bei Bedarf. Wenn ein neues Modell angefragt wird und nicht genug RAM frei ist, wird das vorherige entladen. Für beste Performance:
|
||||
|
||||
1. Hauptsächlich ein Modell verwenden
|
||||
2. Zwischen ähnlichen Größen wechseln
|
||||
3. Nach großen Modellen (27B) kurz warten vor kleinen Anfragen
|
||||
|
||||
## Changelog
|
||||
|
||||
| Datum | Änderung |
|
||||
|-------|----------|
|
||||
| 2026-02-01 | gemma3:12b, gemma3:27b, qwen2.5-coder:14b hinzugefügt |
|
||||
| 2026-02-01 | MODEL_METADATA System eingeführt |
|
||||
| 2026-02-01 | Standard-Modell auf gemma3:4b geändert |
|
||||
|
|
@ -42,6 +42,7 @@
|
|||
class="flex cursor-pointer items-center gap-2 rounded p-2 transition-colors hover:bg-zinc-800"
|
||||
class:opacity-50={isDisabled}
|
||||
class:cursor-not-allowed={isDisabled}
|
||||
title={model.description || ''}
|
||||
>
|
||||
<input
|
||||
type="checkbox"
|
||||
|
|
@ -50,9 +51,16 @@
|
|||
disabled={isDisabled}
|
||||
class="rounded"
|
||||
/>
|
||||
<span class="truncate text-sm" style="color: var(--color-text);">
|
||||
{getModelDisplayName(model.id)}
|
||||
</span>
|
||||
<div class="min-w-0 flex-1">
|
||||
<span class="block truncate text-sm" style="color: var(--color-text);">
|
||||
{getModelDisplayName(model.id)}
|
||||
</span>
|
||||
{#if model.description}
|
||||
<span class="block truncate text-xs" style="color: var(--color-text-muted);">
|
||||
{model.description}
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
</label>
|
||||
{/each}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,11 +1,19 @@
|
|||
<script lang="ts">
|
||||
import { modelsStore } from '$lib/stores/models.svelte';
|
||||
import { modelsStore, MODEL_METADATA } from '$lib/stores/models.svelte';
|
||||
import { settingsStore } from '$lib/stores/settings.svelte';
|
||||
import { onMount } from 'svelte';
|
||||
|
||||
onMount(() => {
|
||||
modelsStore.loadModels();
|
||||
});
|
||||
|
||||
// Get description for currently selected model
|
||||
const selectedModelDescription = $derived(() => {
|
||||
const modelName = settingsStore.model.includes('/')
|
||||
? settingsStore.model.split('/').slice(1).join('/')
|
||||
: settingsStore.model;
|
||||
return MODEL_METADATA[modelName]?.description;
|
||||
});
|
||||
</script>
|
||||
|
||||
<div>
|
||||
|
|
@ -48,8 +56,11 @@
|
|||
</optgroup>
|
||||
{/each}
|
||||
</select>
|
||||
<p class="mt-1.5 text-xs" style="color: var(--color-text-muted);">
|
||||
{modelsStore.models.length} models available
|
||||
</p>
|
||||
<div class="mt-1.5 text-xs" style="color: var(--color-text-muted);">
|
||||
{#if selectedModelDescription()}
|
||||
<p class="mb-0.5">{selectedModelDescription()}</p>
|
||||
{/if}
|
||||
<p>{modelsStore.models.length} models available</p>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,11 +1,75 @@
|
|||
import type { Model, ModelWithModality, Modality, Provider } from '$lib/types';
|
||||
import { getModels } from '$lib/api/llm';
|
||||
|
||||
// Detect modality from model ID
|
||||
/**
|
||||
* Model metadata configuration
|
||||
* Add new models here when installing them on the server
|
||||
* See: docs/OLLAMA_MODELS.md for instructions
|
||||
*/
|
||||
export const MODEL_METADATA: Record<string, { description: string; modality: Modality }> = {
|
||||
// Text Models - General Purpose
|
||||
'gemma3:4b': {
|
||||
description: 'Fast general-purpose model (~53 t/s)',
|
||||
modality: 'text',
|
||||
},
|
||||
'gemma3:12b': {
|
||||
description: 'Balanced quality & speed (~30 t/s)',
|
||||
modality: 'text',
|
||||
},
|
||||
'gemma3:27b': {
|
||||
description: 'Best quality, slower (~15 t/s)',
|
||||
modality: 'text',
|
||||
},
|
||||
'phi3.5:latest': {
|
||||
description: 'Microsoft Phi-3.5 - compact & efficient',
|
||||
modality: 'text',
|
||||
},
|
||||
'ministral-3:3b': {
|
||||
description: 'Mistral Mini - fast for simple tasks',
|
||||
modality: 'text',
|
||||
},
|
||||
|
||||
// Vision Models
|
||||
'llava:7b': {
|
||||
description: 'Image understanding & description',
|
||||
modality: 'vision',
|
||||
},
|
||||
'qwen3-vl:4b': {
|
||||
description: 'Qwen Vision-Language model',
|
||||
modality: 'vision',
|
||||
},
|
||||
'deepseek-ocr:latest': {
|
||||
description: 'OCR & document understanding',
|
||||
modality: 'vision',
|
||||
},
|
||||
|
||||
// Code Models
|
||||
'qwen2.5-coder:7b': {
|
||||
description: 'Code generation & completion (7B)',
|
||||
modality: 'code',
|
||||
},
|
||||
'qwen2.5-coder:14b': {
|
||||
description: 'Advanced code generation (14B)',
|
||||
modality: 'code',
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Detect modality from model ID
|
||||
* First checks MODEL_METADATA, then falls back to pattern matching
|
||||
*/
|
||||
function detectModality(modelId: string): Modality {
|
||||
const id = modelId.toLowerCase();
|
||||
|
||||
// Vision models
|
||||
// Extract model name from provider prefix (e.g., "ollama/gemma3:4b" -> "gemma3:4b")
|
||||
const modelName = id.includes('/') ? id.split('/').slice(1).join('/') : id;
|
||||
|
||||
// Check metadata first
|
||||
if (MODEL_METADATA[modelName]) {
|
||||
return MODEL_METADATA[modelName].modality;
|
||||
}
|
||||
|
||||
// Vision models (pattern matching fallback)
|
||||
if (
|
||||
id.includes('llava') ||
|
||||
id.includes('vision') ||
|
||||
|
|
@ -16,7 +80,7 @@ function detectModality(modelId: string): Modality {
|
|||
return 'vision';
|
||||
}
|
||||
|
||||
// Code models
|
||||
// Code models (pattern matching fallback)
|
||||
if (id.includes('coder') || id.includes('codellama') || id.includes('starcoder')) {
|
||||
return 'code';
|
||||
}
|
||||
|
|
@ -25,6 +89,14 @@ function detectModality(modelId: string): Modality {
|
|||
return 'text';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get model description from metadata
|
||||
*/
|
||||
function getModelDescription(modelId: string): string | undefined {
|
||||
const modelName = modelId.includes('/') ? modelId.split('/').slice(1).join('/') : modelId;
|
||||
return MODEL_METADATA[modelName]?.description;
|
||||
}
|
||||
|
||||
interface GroupedModels {
|
||||
provider: Provider;
|
||||
label: string;
|
||||
|
|
@ -41,6 +113,7 @@ function createModelsStore() {
|
|||
models.map((model) => ({
|
||||
...model,
|
||||
modality: detectModality(model.id),
|
||||
description: getModelDescription(model.id),
|
||||
}))
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import { browser } from '$app/environment';
|
|||
const STORAGE_KEY = 'llm-playground-settings';
|
||||
|
||||
const defaultSettings: Settings = {
|
||||
model: 'ollama/llama3.2:3b',
|
||||
model: 'ollama/gemma3:4b',
|
||||
temperature: 0.7,
|
||||
maxTokens: 2048,
|
||||
topP: 1.0,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue