mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 19:01:08 +02:00
Replaces the old Ollama→Google special-case auto-fallback with the unified pipeline: caller passes either a direct provider/model or an alias from the `mana/` namespace; the router resolves to a chain and walks it skipping unhealthy providers (per ProviderHealthCache from M2), trying each entry, marking provider unhealthy on retryable errors and falling through to the next. Retryable: ConnectError, ReadTimeout, RemoteProtocolError, 5xx, ProviderRateLimitError. Propagated (don't fall back, don't poison the cache): ProviderCapabilityError, ProviderAuthError, ProviderBlockedError, 4xx, unknown exception types. The cache stays "what the network told us about this provider's liveness" — caller errors don't muddy that signal. Streaming: pre-first-byte fallback only. Once a chunk has been yielded the provider is committed; mid-stream errors propagate as-is so we don't splice two voices into one output. `NoHealthyProviderError` (HTTP 503) carries a structured attempt log — each chain entry shows up as `(model, reason)` so the cause of a 503 is visible in the response and metrics, not only in service logs. main.py wires the lifespan: aliases.yaml is loaded, ProviderHealthCache created, ProviderRouter takes both as constructor deps, HealthProbe spawned with cheap HTTP probes per configured provider (Ollama /api/tags, OpenAI-compat /v1/models with Bearer header). Google is skipped — google-genai SDK has no obvious cheap probe; the call-site fallback handles real errors. 22 new router tests (test_router_fallback.py): chain walking, capability & auth propagation, 5xx vs 4xx differentiation, rate-limit retry, all-fail → NoHealthyProviderError, direct provider strings bypass aliases, streaming pre-first-byte fallback, mid-stream-failure does NOT fall back, empty stream commits without retry, cache feedback on success/failure/non-retryable. Existing test_providers.py updated for the new constructor signature; all 99 service tests green via the dev container (Python 3.12). Legacy purged: `_ollama_concurrent`, `_ollama_health_cache`, `_can_fallback_to_google`, `_should_use_ollama`, `_fallback_to_google`, `_get_ollama_health_cached` all gone. The `auto_fallback_enabled` / `ollama_max_concurrent` settings remain in config.py for now (M5 will remove them along with the per-feature env-var overrides). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
123 lines
4.3 KiB
Python
123 lines
4.3 KiB
Python
"""Provider tests."""
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from src.aliases import AliasRegistry
|
|
from src.health import ProviderHealthCache
|
|
from src.models import ChatCompletionRequest, EmbeddingRequest, Message
|
|
from src.providers import OllamaProvider, OpenAICompatProvider, ProviderRouter
|
|
|
|
|
|
@pytest.fixture
|
|
def shipped_aliases() -> AliasRegistry:
|
|
"""The repo's real aliases.yaml — same one production uses."""
|
|
return AliasRegistry(Path(__file__).resolve().parents[1] / "aliases.yaml")
|
|
|
|
|
|
@pytest.fixture
|
|
def router(shipped_aliases: AliasRegistry) -> ProviderRouter:
|
|
return ProviderRouter(aliases=shipped_aliases, health_cache=ProviderHealthCache())
|
|
|
|
|
|
class TestProviderRouter:
|
|
"""Tests for the helpers exposed by the router."""
|
|
|
|
def test_parse_model_with_provider(self, router: ProviderRouter) -> None:
|
|
provider, model = router._parse_model("ollama/gemma3:4b")
|
|
assert provider == "ollama"
|
|
assert model == "gemma3:4b"
|
|
|
|
def test_parse_model_without_provider(self, router: ProviderRouter) -> None:
|
|
# Bare names default to Ollama for OpenAI-style compat.
|
|
provider, model = router._parse_model("gemma3:4b")
|
|
assert provider == "ollama"
|
|
assert model == "gemma3:4b"
|
|
|
|
def test_parse_model_openrouter(self, router: ProviderRouter) -> None:
|
|
provider, model = router._parse_model("openrouter/meta-llama/llama-3.1-8b-instruct")
|
|
assert provider == "openrouter"
|
|
assert model == "meta-llama/llama-3.1-8b-instruct"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_embeddings_unknown_provider_raises(self, router: ProviderRouter) -> None:
|
|
# Embeddings don't go through the alias/fallback pipeline — they
|
|
# hit the requested provider directly. Asking for an unconfigured
|
|
# one is a config error and must raise loudly.
|
|
with pytest.raises(ValueError, match="not available"):
|
|
await router.embeddings(
|
|
EmbeddingRequest(model="bogus_provider/x", input="hi")
|
|
)
|
|
|
|
|
|
class TestOllamaProvider:
|
|
"""Test Ollama provider."""
|
|
|
|
def test_convert_simple_messages(self):
|
|
"""Test converting simple text messages."""
|
|
provider = OllamaProvider()
|
|
request = ChatCompletionRequest(
|
|
model="gemma3:4b",
|
|
messages=[
|
|
Message(role="user", content="Hello"),
|
|
],
|
|
)
|
|
|
|
messages = provider._convert_messages(request)
|
|
assert len(messages) == 1
|
|
assert messages[0]["role"] == "user"
|
|
assert messages[0]["content"] == "Hello"
|
|
|
|
def test_convert_multimodal_messages(self):
|
|
"""Test converting multimodal messages."""
|
|
provider = OllamaProvider()
|
|
request = ChatCompletionRequest(
|
|
model="llava:7b",
|
|
messages=[
|
|
Message(
|
|
role="user",
|
|
content=[
|
|
{"type": "text", "text": "What's in this image?"},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {"url": "data:image/png;base64,iVBORw0KGgo="},
|
|
},
|
|
],
|
|
),
|
|
],
|
|
)
|
|
|
|
messages = provider._convert_messages(request)
|
|
assert len(messages) == 1
|
|
assert messages[0]["role"] == "user"
|
|
assert messages[0]["content"] == "What's in this image?"
|
|
assert "images" in messages[0]
|
|
assert len(messages[0]["images"]) == 1
|
|
|
|
|
|
class TestOpenAICompatProvider:
|
|
"""Test OpenAI-compatible provider."""
|
|
|
|
def test_convert_simple_messages(self):
|
|
"""Test converting simple text messages."""
|
|
provider = OpenAICompatProvider(
|
|
name="test",
|
|
base_url="http://localhost",
|
|
api_key="test-key",
|
|
)
|
|
|
|
request = ChatCompletionRequest(
|
|
model="test-model",
|
|
messages=[
|
|
Message(role="system", content="You are helpful."),
|
|
Message(role="user", content="Hello"),
|
|
],
|
|
)
|
|
|
|
messages = provider._convert_messages(request)
|
|
assert len(messages) == 2
|
|
assert messages[0]["role"] == "system"
|
|
assert messages[0]["content"] == "You are helpful."
|
|
assert messages[1]["role"] == "user"
|
|
assert messages[1]["content"] == "Hello"
|