managarten/services/mana-llm/tests/test_providers.py
Till JS 3046da3b19 feat(mana-llm): M3 — health-aware router with alias + chain fallback
Replaces the old Ollama→Google special-case auto-fallback with the
unified pipeline: caller passes either a direct provider/model or an
alias from the `mana/` namespace; the router resolves to a chain and
walks it skipping unhealthy providers (per ProviderHealthCache from M2),
trying each entry, marking provider unhealthy on retryable errors and
falling through to the next.

Retryable: ConnectError, ReadTimeout, RemoteProtocolError, 5xx,
ProviderRateLimitError. Propagated (don't fall back, don't poison the
cache): ProviderCapabilityError, ProviderAuthError, ProviderBlockedError,
4xx, unknown exception types. The cache stays "what the network told us
about this provider's liveness" — caller errors don't muddy that signal.

Streaming: pre-first-byte fallback only. Once a chunk has been yielded
the provider is committed; mid-stream errors propagate as-is so we
don't splice two voices into one output.

`NoHealthyProviderError` (HTTP 503) carries a structured attempt log —
each chain entry shows up as `(model, reason)` so the cause of a 503
is visible in the response and metrics, not only in service logs.

main.py wires the lifespan: aliases.yaml is loaded, ProviderHealthCache
created, ProviderRouter takes both as constructor deps, HealthProbe
spawned with cheap HTTP probes per configured provider (Ollama
/api/tags, OpenAI-compat /v1/models with Bearer header). Google is
skipped — google-genai SDK has no obvious cheap probe; the call-site
fallback handles real errors.

22 new router tests (test_router_fallback.py): chain walking, capability
& auth propagation, 5xx vs 4xx differentiation, rate-limit retry,
all-fail → NoHealthyProviderError, direct provider strings bypass
aliases, streaming pre-first-byte fallback, mid-stream-failure does
NOT fall back, empty stream commits without retry, cache feedback on
success/failure/non-retryable. Existing test_providers.py updated for
the new constructor signature; all 99 service tests green via the dev
container (Python 3.12).

Legacy purged: `_ollama_concurrent`, `_ollama_health_cache`,
`_can_fallback_to_google`, `_should_use_ollama`, `_fallback_to_google`,
`_get_ollama_health_cached` all gone. The `auto_fallback_enabled` /
`ollama_max_concurrent` settings remain in config.py for now (M5 will
remove them along with the per-feature env-var overrides).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 20:44:16 +02:00

123 lines
4.3 KiB
Python

"""Provider tests."""
from pathlib import Path
import pytest
from src.aliases import AliasRegistry
from src.health import ProviderHealthCache
from src.models import ChatCompletionRequest, EmbeddingRequest, Message
from src.providers import OllamaProvider, OpenAICompatProvider, ProviderRouter
@pytest.fixture
def shipped_aliases() -> AliasRegistry:
"""The repo's real aliases.yaml — same one production uses."""
return AliasRegistry(Path(__file__).resolve().parents[1] / "aliases.yaml")
@pytest.fixture
def router(shipped_aliases: AliasRegistry) -> ProviderRouter:
return ProviderRouter(aliases=shipped_aliases, health_cache=ProviderHealthCache())
class TestProviderRouter:
"""Tests for the helpers exposed by the router."""
def test_parse_model_with_provider(self, router: ProviderRouter) -> None:
provider, model = router._parse_model("ollama/gemma3:4b")
assert provider == "ollama"
assert model == "gemma3:4b"
def test_parse_model_without_provider(self, router: ProviderRouter) -> None:
# Bare names default to Ollama for OpenAI-style compat.
provider, model = router._parse_model("gemma3:4b")
assert provider == "ollama"
assert model == "gemma3:4b"
def test_parse_model_openrouter(self, router: ProviderRouter) -> None:
provider, model = router._parse_model("openrouter/meta-llama/llama-3.1-8b-instruct")
assert provider == "openrouter"
assert model == "meta-llama/llama-3.1-8b-instruct"
@pytest.mark.asyncio
async def test_embeddings_unknown_provider_raises(self, router: ProviderRouter) -> None:
# Embeddings don't go through the alias/fallback pipeline — they
# hit the requested provider directly. Asking for an unconfigured
# one is a config error and must raise loudly.
with pytest.raises(ValueError, match="not available"):
await router.embeddings(
EmbeddingRequest(model="bogus_provider/x", input="hi")
)
class TestOllamaProvider:
"""Test Ollama provider."""
def test_convert_simple_messages(self):
"""Test converting simple text messages."""
provider = OllamaProvider()
request = ChatCompletionRequest(
model="gemma3:4b",
messages=[
Message(role="user", content="Hello"),
],
)
messages = provider._convert_messages(request)
assert len(messages) == 1
assert messages[0]["role"] == "user"
assert messages[0]["content"] == "Hello"
def test_convert_multimodal_messages(self):
"""Test converting multimodal messages."""
provider = OllamaProvider()
request = ChatCompletionRequest(
model="llava:7b",
messages=[
Message(
role="user",
content=[
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,iVBORw0KGgo="},
},
],
),
],
)
messages = provider._convert_messages(request)
assert len(messages) == 1
assert messages[0]["role"] == "user"
assert messages[0]["content"] == "What's in this image?"
assert "images" in messages[0]
assert len(messages[0]["images"]) == 1
class TestOpenAICompatProvider:
"""Test OpenAI-compatible provider."""
def test_convert_simple_messages(self):
"""Test converting simple text messages."""
provider = OpenAICompatProvider(
name="test",
base_url="http://localhost",
api_key="test-key",
)
request = ChatCompletionRequest(
model="test-model",
messages=[
Message(role="system", content="You are helpful."),
Message(role="user", content="Hello"),
],
)
messages = provider._convert_messages(request)
assert len(messages) == 2
assert messages[0]["role"] == "system"
assert messages[0]["content"] == "You are helpful."
assert messages[1]["role"] == "user"
assert messages[1]["content"] == "Hello"