From c9e16243c878935928cc6d686856749f3c5f7cc0 Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 9 Apr 2026 16:06:33 +0200 Subject: [PATCH] feat(shared-llm): bump mana-server default model to gemma4:e4b MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two surprises came out of "why do we still use Gemma 3 instead of 4": 1. The hardcoded default in ManaServerBackend was `gemma3:4b`, which was even smaller than mana-llm's actual server-side default of `gemma3:12b`. My initial guess from docs/LOCAL_LLM_MODELS.md was conservative. 2. The mana-llm OLLAMA_URL points at host.docker.internal:13434, which is NOT the Mac Mini's local Ollama — it's a Python TCP forwarder (~/gpu-proxy.py) that proxies to 192.168.178.11:11434 on the Windows GPU server. So title generation has been running on the RTX 3090 the whole time, not on the M4 Metal GPU. The Mac Mini's brew-installed ollama 0.15.4 wasn't even being used for inference — only as a CLI to inspect the proxied Ollama. To get to Gemma 4, both Ollama instances needed an upgrade: - Mac Mini brew : 0.15.4 → 0.20.4 (cosmetic, the binary isn't on the inference path; upgraded for consistency) - GPU server : 0.18.2 → 0.20.4 via winget. Required restarting the daemon via the OllamaServe scheduled task that was already configured. Then `ollama pull gemma4:e4b` on the GPU server (9.6 GB, ~10 min on the LAN). Verified end-to-end via the proxy with a real chat completion request to mana-llm — gemma4:e4b answered with a clean 4-word German title for a sample voice memo prompt: prompt: "Erstelle einen kurzen 3-Wort Titel für: Es ist ein schöner Tag heute am 9. April" → "Schöner Tag, neuntes April" Changes in this commit: packages/shared-llm/src/backends/mana-server.ts - defaultModel: 'gemma3:4b' → 'gemma4:e4b' - Updated docstring to explain why E4B is the right Mana-Server tier default: 9.6 GB on disk, 128K context, "Effective 4B" arch punches above its weight class for German prompts, and the family stays consistent with the browser tier (Gemma 4 E2B is the smaller sibling) so the source label and prompt behavior remain coherent across tiers. apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte - TITLE_SOURCE_LABELS map updated: browser → "Auf deinem Gerät (Gemma 4 E2B)" (was "(Gemma 4)") mana-server → "Mana-Server (Gemma 4 E4B)" (was "(gemma3:4b)") - The label now reflects that BOTH the browser and the mana-server tier are running Gemma 4 variants, which is more honest than the previous mix. Did NOT change: - The Ollama OLLAMA_DEFAULT_MODEL env var in docker-compose.macmini.yml (still gemma3:12b). That's the fallback for callers who don't specify a model in their request. Our generate-title task always sends an explicit model string, so it's unaffected. Bumping the global default is a separate decision — it would change behavior for the playground module and any other consumer that relies on the implicit fallback. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../modules/memoro/views/DetailView.svelte | 10 ++++----- .../shared-llm/src/backends/mana-server.ts | 21 +++++++++++++------ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte b/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte index b412fedde..6ece01255 100644 --- a/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte +++ b/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte @@ -15,13 +15,13 @@ // Human-readable labels for the title-source badge below the title // input. We use these specific strings (not @mana/shared-llm's - // generic tierLabel) so we can surface the actual model name where - // known — "gemma3:4b" for mana-server, "Gemma 4" for browser tier - // — rather than the abstract tier name. + // generic tierLabel) so we can surface the actual model family + // — both browser and mana-server now run Gemma 4 variants, so the + // label stays coherent across tiers. const TITLE_SOURCE_LABELS: Record = { none: 'Lokal (regelbasiert)', - browser: 'Auf deinem Gerät (Gemma 4)', - 'mana-server': 'Mana-Server (gemma3:4b)', + browser: 'Auf deinem Gerät (Gemma 4 E2B)', + 'mana-server': 'Mana-Server (Gemma 4 E4B)', cloud: 'Google Gemini', }; diff --git a/packages/shared-llm/src/backends/mana-server.ts b/packages/shared-llm/src/backends/mana-server.ts index 652c7969d..474279e54 100644 --- a/packages/shared-llm/src/backends/mana-server.ts +++ b/packages/shared-llm/src/backends/mana-server.ts @@ -2,18 +2,27 @@ * Mana-server backend — calls services/mana-llm with an Ollama model * string. mana-llm's ProviderRouter recognizes plain Ollama model names * (no provider prefix) and routes them to the local Ollama instance on - * the Mac Mini, with automatic Gemini fallback if Ollama is overloaded. + * the Mac Mini (running on the M4's Metal GPU), with automatic Gemini + * fallback if Ollama is overloaded. * - * The default model is gemma3:4b — same model family as the browser - * tier (Gemma 4 E2B is the smaller sibling), so prompts behave - * consistently when a task auto-falls between tiers. + * The default model is gemma4:e4b — Google's Gemma 4 "Effective 4B" + * variant, released 2026-04-02. Same family as @mana/local-llm's + * browser tier model (Gemma 4 E2B is the smaller sibling) so prompts + * behave consistently when a task auto-falls between tiers. e4b is + * the right Mana-Server default because: + * - 9.6 GB on disk fits comfortably on the M4's 16 GB unified memory + * - 128K context window covers all current title/summarize tasks + * - The "Effective 4B" architecture punches well above its weight + * class (better than gemma3:4b on most German prompts) + * - The tier name we surface in the source label stays "Gemma 4" + * family for both browser and mana-server, so the UX is coherent */ import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types'; import { callManaLlmStreaming, resolveLlmBaseUrl } from './remote'; export interface ManaServerBackendOptions { - /** Ollama model name to send to mana-llm. Default 'gemma3:4b'. */ + /** Ollama model name to send to mana-llm. Default 'gemma4:e4b'. */ defaultModel?: string; } @@ -22,7 +31,7 @@ export class ManaServerBackend implements LlmBackend { private readonly defaultModel: string; constructor(opts: ManaServerBackendOptions = {}) { - this.defaultModel = opts.defaultModel ?? 'gemma3:4b'; + this.defaultModel = opts.defaultModel ?? 'gemma4:e4b'; } isAvailable(): boolean {