From 8a0bf9369987935dedded8b2e1f9497d99babe76 Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 16 Apr 2026 12:32:03 +0200 Subject: [PATCH] =?UTF-8?q?chore(cloud-tier):=20upgrade=20default=20model?= =?UTF-8?q?=20gemini-2.0-flash=20=E2=86=92=20gemini-2.5-flash?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gemini-2.0-flash is deprecated June 1 2026. gemini-2.5-flash has been stable since Q1 2026 with similar pricing ($0.15/$0.60 per 1M tokens vs $0.10/$0.40 — pricing table already had the entry). Three files touched: - packages/shared-llm/src/backends/cloud.ts — client default - services/mana-llm/src/config.py — server default - services/mana-llm/src/providers/google.py — Ollama→Gemini fallback map + constructor default + deduplicated model list Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/shared-llm/src/backends/cloud.ts | 2 +- services/mana-llm/src/config.py | 2 +- services/mana-llm/src/providers/google.py | 21 ++++++++++----------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/packages/shared-llm/src/backends/cloud.ts b/packages/shared-llm/src/backends/cloud.ts index 9c5e35fd9..df7d535b4 100644 --- a/packages/shared-llm/src/backends/cloud.ts +++ b/packages/shared-llm/src/backends/cloud.ts @@ -27,7 +27,7 @@ export class CloudBackend implements LlmBackend { private readonly defaultModel: string; constructor(opts: CloudBackendOptions = {}) { - this.defaultModel = opts.defaultModel ?? 'google/gemini-2.0-flash'; + this.defaultModel = opts.defaultModel ?? 'google/gemini-2.5-flash'; } isAvailable(): boolean { diff --git a/services/mana-llm/src/config.py b/services/mana-llm/src/config.py index b56fd5646..b17fdc0ba 100644 --- a/services/mana-llm/src/config.py +++ b/services/mana-llm/src/config.py @@ -30,7 +30,7 @@ class Settings(BaseSettings): # Google Gemini (Fallback provider) google_api_key: str | None = None - google_default_model: str = "gemini-2.0-flash" + google_default_model: str = "gemini-2.5-flash" # Auto-fallback: Ollama → Google when Ollama is overloaded/down auto_fallback_enabled: bool = True diff --git a/services/mana-llm/src/providers/google.py b/services/mana-llm/src/providers/google.py index 2d5dfa2f7..8b15a0ede 100644 --- a/services/mana-llm/src/providers/google.py +++ b/services/mana-llm/src/providers/google.py @@ -29,16 +29,16 @@ logger = logging.getLogger(__name__) # Model mapping: Ollama model → Google Gemini equivalent OLLAMA_TO_GEMINI: dict[str, str] = { - "gemma3:4b": "gemini-2.0-flash", - "gemma3:12b": "gemini-2.0-flash", + "gemma3:4b": "gemini-2.5-flash", + "gemma3:12b": "gemini-2.5-flash", "gemma3:27b": "gemini-2.5-pro", - "llava:7b": "gemini-2.0-flash", # Gemini has native vision - "qwen3-vl:4b": "gemini-2.0-flash", # vision fallback - "qwen2.5-coder:7b": "gemini-2.0-flash", + "llava:7b": "gemini-2.5-flash", # Gemini has native vision + "qwen3-vl:4b": "gemini-2.5-flash", # vision fallback + "qwen2.5-coder:7b": "gemini-2.5-flash", "qwen2.5-coder:14b": "gemini-2.5-pro", - "phi3.5:latest": "gemini-2.0-flash", - "ministral-3:3b": "gemini-2.0-flash", - "deepseek-ocr:latest": "gemini-2.0-flash", + "phi3.5:latest": "gemini-2.5-flash", + "ministral-3:3b": "gemini-2.5-flash", + "deepseek-ocr:latest": "gemini-2.5-flash", } @@ -47,7 +47,7 @@ class GoogleProvider(LLMProvider): name = "google" - def __init__(self, api_key: str, default_model: str = "gemini-2.0-flash"): + def __init__(self, api_key: str, default_model: str = "gemini-2.5-flash"): self.api_key = api_key self.default_model = default_model self.client = genai.Client(api_key=api_key) @@ -219,9 +219,8 @@ class GoogleProvider(LLMProvider): """List available Google Gemini models.""" # Return a static list of commonly used models return [ - ModelInfo(id="google/gemini-2.0-flash", owned_by="google"), - ModelInfo(id="google/gemini-2.5-pro", owned_by="google"), ModelInfo(id="google/gemini-2.5-flash", owned_by="google"), + ModelInfo(id="google/gemini-2.5-pro", owned_by="google"), ] async def embeddings(