diff --git a/services/mana-llm/aliases.yaml b/services/mana-llm/aliases.yaml new file mode 100644 index 000000000..f6850a748 --- /dev/null +++ b/services/mana-llm/aliases.yaml @@ -0,0 +1,54 @@ +# mana-llm Model Aliases — single source of truth for which class of +# model each backend feature uses. +# +# Consumers (mana-api, mana-ai, …) send `"model": "mana/"` in +# their /v1/chat/completions requests; mana-llm resolves the alias to +# the chain below and tries entries in order, skipping providers that +# the health-cache has marked unhealthy. +# +# Order in `chain` = preference. First healthy entry wins. Each chain +# should end with a cloud provider so the system stays functional even +# when the local GPU server (mana-gpu, RTX 3090) is offline. +# +# Reload at runtime: `kill -HUP ` after editing — no restart needed. +# Reference: docs/plans/llm-fallback-aliases.md. + +aliases: + mana/fast-text: + description: "Short answers, classification, single-shot Q&A" + chain: + - ollama/qwen2.5:7b + - groq/llama-3.1-8b-instant + - openrouter/anthropic/claude-3-haiku + + mana/long-form: + description: "Writing, essays, stories, longer prose" + chain: + - ollama/gemma3:12b + - groq/llama-3.3-70b-versatile + - openrouter/anthropic/claude-3.5-haiku + + mana/structured: + description: "JSON output (comic storyboards, research subqueries, tag suggestions)" + chain: + - ollama/qwen2.5:7b + - groq/llama-3.1-8b-instant + - openrouter/openai/gpt-4o-mini + + mana/reasoning: + description: "Agent missions, tool calls, multi-step plans" + # Cloud first by design — local 4-7B models are unreliable for tool calls + chain: + - openrouter/anthropic/claude-3.5-sonnet + - groq/llama-3.3-70b-versatile + + mana/vision: + description: "Multimodal (image + text)" + chain: + - ollama/llava:7b + - google/gemini-2.0-flash-exp + - openrouter/openai/gpt-4o + +# Default alias used when a request omits `model` or sends an unknown +# value with no provider prefix. Keep this conservative (cheap class). +default: mana/fast-text diff --git a/services/mana-llm/pyproject.toml b/services/mana-llm/pyproject.toml index 69b06bb8e..a2125b1c6 100644 --- a/services/mana-llm/pyproject.toml +++ b/services/mana-llm/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "redis>=5.2.0", "prometheus-client>=0.21.0", "google-genai>=1.0.0", + "pyyaml>=6.0.2", ] [project.optional-dependencies] diff --git a/services/mana-llm/requirements.txt b/services/mana-llm/requirements.txt index fa65984d2..eb4686e25 100644 --- a/services/mana-llm/requirements.txt +++ b/services/mana-llm/requirements.txt @@ -19,6 +19,9 @@ google-genai>=1.0.0 # Metrics prometheus-client>=0.21.0 +# Config (alias registry) +pyyaml>=6.0.2 + # Dev pytest>=8.3.0 pytest-asyncio>=0.24.0 diff --git a/services/mana-llm/src/aliases.py b/services/mana-llm/src/aliases.py new file mode 100644 index 000000000..9e4edd26e --- /dev/null +++ b/services/mana-llm/src/aliases.py @@ -0,0 +1,223 @@ +"""Model-alias registry. + +Loads `aliases.yaml` and exposes a small API the router uses to resolve +semantic model names like ``mana/long-form`` to an ordered list of +concrete provider-prefixed model strings (``ollama/gemma3:12b`` → +``groq/llama-3.3-70b-versatile`` → …). + +The registry is hot-reloadable: ``reload()`` rebuilds the in-memory +mapping atomically. Reload errors leave the previous good state intact +so a typo in the yaml file doesn't take the service down — caller logs +the error and keeps serving. + +See docs/plans/llm-fallback-aliases.md for the full design. +""" + +from __future__ import annotations + +import logging +import threading +from dataclasses import dataclass +from pathlib import Path + +import yaml + +logger = logging.getLogger(__name__) + +# Aliases live in this namespace. Anything else passed as `model` is +# treated as a direct provider/model string (preserves the legal +# bypass-the-alias-layer escape hatch for tests/debugging). +ALIAS_PREFIX = "mana/" + + +@dataclass(frozen=True) +class Alias: + """A resolved alias entry.""" + + name: str + description: str + chain: tuple[str, ...] + + +class AliasConfigError(ValueError): + """Raised when the YAML file is malformed or violates schema constraints.""" + + +class UnknownAliasError(KeyError): + """Raised when a caller asks for an alias that isn't defined.""" + + +def _validate_chain(name: str, chain: object) -> tuple[str, ...]: + """Schema-check a single alias chain. Returns the validated tuple.""" + if not isinstance(chain, list): + raise AliasConfigError(f"alias '{name}': chain must be a list, got {type(chain).__name__}") + if not chain: + raise AliasConfigError(f"alias '{name}': chain must not be empty") + out: list[str] = [] + for i, entry in enumerate(chain): + if not isinstance(entry, str) or not entry.strip(): + raise AliasConfigError( + f"alias '{name}': chain[{i}] must be a non-empty string, got {entry!r}" + ) + if "/" not in entry: + raise AliasConfigError( + f"alias '{name}': chain[{i}] = {entry!r} must include a provider prefix " + f"(e.g. 'ollama/...', 'groq/...')" + ) + out.append(entry.strip()) + return tuple(out) + + +def _validate_name(name: object) -> str: + """Aliases must live in the reserved `mana/` namespace.""" + if not isinstance(name, str) or not name.startswith(ALIAS_PREFIX): + raise AliasConfigError( + f"alias name {name!r} must start with {ALIAS_PREFIX!r} (the reserved namespace)" + ) + suffix = name[len(ALIAS_PREFIX) :] + if not suffix or "/" in suffix: + raise AliasConfigError( + f"alias name {name!r} must have exactly one segment after {ALIAS_PREFIX!r}" + ) + return name + + +def _parse_document(doc: object) -> tuple[dict[str, Alias], str | None]: + """Parse a loaded YAML document into a normalized (aliases, default) pair.""" + if not isinstance(doc, dict): + raise AliasConfigError(f"yaml root must be a mapping, got {type(doc).__name__}") + + raw_aliases = doc.get("aliases", {}) + if not isinstance(raw_aliases, dict): + raise AliasConfigError( + f"`aliases` must be a mapping, got {type(raw_aliases).__name__}" + ) + if not raw_aliases: + raise AliasConfigError("`aliases` is empty — at least one alias is required") + + parsed: dict[str, Alias] = {} + for name, body in raw_aliases.items(): + validated_name = _validate_name(name) + if not isinstance(body, dict): + raise AliasConfigError( + f"alias '{validated_name}': body must be a mapping, got {type(body).__name__}" + ) + description = body.get("description", "") + if not isinstance(description, str): + raise AliasConfigError( + f"alias '{validated_name}': description must be a string" + ) + chain = _validate_chain(validated_name, body.get("chain")) + parsed[validated_name] = Alias( + name=validated_name, + description=description.strip(), + chain=chain, + ) + + default = doc.get("default") + if default is not None: + if not isinstance(default, str): + raise AliasConfigError(f"`default` must be a string, got {type(default).__name__}") + if default not in parsed: + raise AliasConfigError( + f"`default` references unknown alias {default!r} " + f"(known: {sorted(parsed)})" + ) + + return parsed, default + + +class AliasRegistry: + """Thread-safe in-memory registry of model aliases. + + Construct once at startup with the path to the yaml file. Call + :meth:`reload` to re-read after a SIGHUP. Reads (``resolve``, + ``is_alias``, …) are cheap and lock-free during steady state — they + snapshot the current mapping reference; only the swap on reload is + serialized. + """ + + def __init__(self, path: Path | str): + self._path = Path(path) + self._lock = threading.Lock() + self._aliases: dict[str, Alias] = {} + self._default: str | None = None + self._load() + + @property + def path(self) -> Path: + return self._path + + def _load(self) -> None: + """Initial load — propagates errors so a bad config fails fast at startup.""" + if not self._path.exists(): + raise AliasConfigError(f"alias config not found at {self._path}") + with self._path.open("r", encoding="utf-8") as f: + try: + doc = yaml.safe_load(f) + except yaml.YAMLError as e: + raise AliasConfigError(f"failed to parse {self._path}: {e}") from e + aliases, default = _parse_document(doc) + # No lock needed during __init__ — nothing else can read yet. + self._aliases = aliases + self._default = default + logger.info( + "AliasRegistry loaded %d alias(es) from %s (default=%s)", + len(aliases), + self._path, + default, + ) + + def reload(self) -> None: + """Re-read the yaml file. On parse error, keep the previous state and raise. + + Designed for SIGHUP: callers should ``try/except AliasConfigError`` + and log; do not crash the service on a typo. + """ + with self._path.open("r", encoding="utf-8") as f: + try: + doc = yaml.safe_load(f) + except yaml.YAMLError as e: + raise AliasConfigError(f"failed to parse {self._path}: {e}") from e + aliases, default = _parse_document(doc) + with self._lock: + self._aliases = aliases + self._default = default + logger.info( + "AliasRegistry reloaded %d alias(es) from %s (default=%s)", + len(aliases), + self._path, + default, + ) + + @staticmethod + def is_alias(name: str) -> bool: + """Cheap syntactic check — does this name live in the alias namespace? + + Static; doesn't require a registry instance. Used by the router to + decide whether to dispatch to the alias layer or pass through to + provider-direct routing. + """ + return isinstance(name, str) and name.startswith(ALIAS_PREFIX) + + def resolve(self, name: str) -> Alias: + """Look up the named alias. Raises :class:`UnknownAliasError` if absent.""" + try: + return self._aliases[name] + except KeyError as e: + raise UnknownAliasError( + f"unknown alias {name!r} (known: {sorted(self._aliases)})" + ) from e + + def resolve_chain(self, name: str) -> tuple[str, ...]: + """Sugar for ``resolve(name).chain`` — the form the router actually wants.""" + return self.resolve(name).chain + + @property + def default_alias(self) -> str | None: + """The alias used when a request arrives with no recognizable model.""" + return self._default + + def list_aliases(self) -> list[Alias]: + """All aliases as a snapshot list — for the GET /v1/aliases debug endpoint.""" + return [self._aliases[k] for k in sorted(self._aliases)] diff --git a/services/mana-llm/tests/test_aliases.py b/services/mana-llm/tests/test_aliases.py new file mode 100644 index 000000000..a20389262 --- /dev/null +++ b/services/mana-llm/tests/test_aliases.py @@ -0,0 +1,300 @@ +"""Tests for the model-alias registry.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from src.aliases import ( + ALIAS_PREFIX, + Alias, + AliasConfigError, + AliasRegistry, + UnknownAliasError, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def write_yaml(tmp_path: Path, body: str, name: str = "aliases.yaml") -> Path: + p = tmp_path / name + p.write_text(body, encoding="utf-8") + return p + + +VALID_CONFIG = """\ +aliases: + mana/fast-text: + description: "fast" + chain: + - ollama/qwen2.5:7b + - groq/llama-3.1-8b-instant + mana/long-form: + description: "long" + chain: + - ollama/gemma3:12b + - groq/llama-3.3-70b-versatile +default: mana/fast-text +""" + + +# --------------------------------------------------------------------------- +# Construction & happy-path resolution +# --------------------------------------------------------------------------- + + +class TestRegistryHappyPath: + def test_loads_valid_yaml(self, tmp_path: Path) -> None: + path = write_yaml(tmp_path, VALID_CONFIG) + reg = AliasRegistry(path) + assert reg.path == path + assert reg.default_alias == "mana/fast-text" + + def test_resolve_returns_alias_dataclass(self, tmp_path: Path) -> None: + reg = AliasRegistry(write_yaml(tmp_path, VALID_CONFIG)) + alias = reg.resolve("mana/long-form") + assert isinstance(alias, Alias) + assert alias.name == "mana/long-form" + assert alias.description == "long" + assert alias.chain == ("ollama/gemma3:12b", "groq/llama-3.3-70b-versatile") + + def test_resolve_chain_returns_tuple(self, tmp_path: Path) -> None: + reg = AliasRegistry(write_yaml(tmp_path, VALID_CONFIG)) + chain = reg.resolve_chain("mana/fast-text") + assert chain == ("ollama/qwen2.5:7b", "groq/llama-3.1-8b-instant") + # Tuples ensure callers can't mutate the registry's internal state. + assert isinstance(chain, tuple) + + def test_list_aliases_sorted(self, tmp_path: Path) -> None: + reg = AliasRegistry(write_yaml(tmp_path, VALID_CONFIG)) + names = [a.name for a in reg.list_aliases()] + assert names == sorted(names) + assert names == ["mana/fast-text", "mana/long-form"] + + def test_unknown_alias_raises(self, tmp_path: Path) -> None: + reg = AliasRegistry(write_yaml(tmp_path, VALID_CONFIG)) + with pytest.raises(UnknownAliasError, match="mana/nope"): + reg.resolve("mana/nope") + + def test_default_optional(self, tmp_path: Path) -> None: + body = ( + "aliases:\n" + " mana/x:\n" + ' description: "x"\n' + " chain:\n" + " - ollama/foo:1b\n" + ) + reg = AliasRegistry(write_yaml(tmp_path, body)) + assert reg.default_alias is None + + +class TestIsAlias: + """``is_alias`` is a cheap static syntactic check used by the router.""" + + @pytest.mark.parametrize( + "name", + ["mana/fast-text", "mana/anything", f"{ALIAS_PREFIX}foo"], + ) + def test_recognises_alias_namespace(self, name: str) -> None: + assert AliasRegistry.is_alias(name) is True + + @pytest.mark.parametrize( + "name", + ["ollama/gemma3:4b", "groq/llama", "gemma3:4b", "", "mana", "manaX/foo"], + ) + def test_rejects_non_alias(self, name: str) -> None: + assert AliasRegistry.is_alias(name) is False + + def test_static_no_instance_needed(self) -> None: + # Important: callers can hit this without instantiating, so it must + # be a free function or @staticmethod. + assert AliasRegistry.is_alias("mana/x") is True + + +# --------------------------------------------------------------------------- +# Schema validation — the YAML is user-edited, must fail loudly on typos +# --------------------------------------------------------------------------- + + +class TestSchemaValidation: + def test_missing_file_raises(self, tmp_path: Path) -> None: + with pytest.raises(AliasConfigError, match="not found"): + AliasRegistry(tmp_path / "absent.yaml") + + def test_invalid_yaml_raises(self, tmp_path: Path) -> None: + path = write_yaml(tmp_path, "aliases: [\n unclosed") + with pytest.raises(AliasConfigError, match="failed to parse"): + AliasRegistry(path) + + def test_root_not_a_mapping(self, tmp_path: Path) -> None: + path = write_yaml(tmp_path, "- just-a-list\n") + with pytest.raises(AliasConfigError, match="root must be a mapping"): + AliasRegistry(path) + + def test_aliases_must_be_mapping(self, tmp_path: Path) -> None: + path = write_yaml(tmp_path, "aliases: just-a-string\n") + with pytest.raises(AliasConfigError, match="`aliases` must be a mapping"): + AliasRegistry(path) + + def test_empty_aliases_rejected(self, tmp_path: Path) -> None: + path = write_yaml(tmp_path, "aliases: {}\n") + with pytest.raises(AliasConfigError, match="empty"): + AliasRegistry(path) + + def test_alias_name_must_use_mana_namespace(self, tmp_path: Path) -> None: + body = ( + "aliases:\n" + " fast-text:\n" + ' description: "x"\n' + " chain:\n" + " - ollama/foo:1b\n" + ) + path = write_yaml(tmp_path, body) + with pytest.raises(AliasConfigError, match="mana/"): + AliasRegistry(path) + + def test_alias_name_must_have_one_segment(self, tmp_path: Path) -> None: + body = ( + "aliases:\n" + " mana/foo/bar:\n" + ' description: "x"\n' + " chain:\n" + " - ollama/foo:1b\n" + ) + path = write_yaml(tmp_path, body) + with pytest.raises(AliasConfigError, match="exactly one segment"): + AliasRegistry(path) + + def test_chain_must_be_list(self, tmp_path: Path) -> None: + body = ( + "aliases:\n" + " mana/x:\n" + ' description: "x"\n' + ' chain: "ollama/gemma3:4b"\n' + ) + path = write_yaml(tmp_path, body) + with pytest.raises(AliasConfigError, match="chain must be a list"): + AliasRegistry(path) + + def test_empty_chain_rejected(self, tmp_path: Path) -> None: + body = "aliases:\n mana/x:\n chain: []\n" + path = write_yaml(tmp_path, body) + with pytest.raises(AliasConfigError, match="must not be empty"): + AliasRegistry(path) + + def test_chain_entry_without_provider_prefix_rejected(self, tmp_path: Path) -> None: + # "gemma3:4b" without a provider/ prefix would silently default to + # ollama and confuse the health-cache; reject loudly at config-load. + body = "aliases:\n mana/x:\n chain:\n - gemma3:4b\n" + path = write_yaml(tmp_path, body) + with pytest.raises(AliasConfigError, match="provider prefix"): + AliasRegistry(path) + + def test_chain_entry_must_be_string(self, tmp_path: Path) -> None: + body = "aliases:\n mana/x:\n chain:\n - 42\n" + path = write_yaml(tmp_path, body) + with pytest.raises(AliasConfigError): + AliasRegistry(path) + + def test_default_must_reference_known_alias(self, tmp_path: Path) -> None: + body = ( + "aliases:\n" + " mana/x:\n" + ' description: "x"\n' + " chain:\n" + " - ollama/foo:1b\n" + "default: mana/missing\n" + ) + path = write_yaml(tmp_path, body) + with pytest.raises(AliasConfigError, match="references unknown alias"): + AliasRegistry(path) + + +# --------------------------------------------------------------------------- +# Reload semantics — SIGHUP should be safe even with typos +# --------------------------------------------------------------------------- + + +class TestReload: + def test_reload_picks_up_edits(self, tmp_path: Path) -> None: + path = write_yaml(tmp_path, VALID_CONFIG) + reg = AliasRegistry(path) + assert reg.resolve_chain("mana/long-form") == ( + "ollama/gemma3:12b", + "groq/llama-3.3-70b-versatile", + ) + + # Edit on disk: shrink the long-form chain. + new_body = ( + "aliases:\n" + " mana/long-form:\n" + ' description: "shorter"\n' + " chain:\n" + " - groq/llama-3.3-70b-versatile\n" + "default: mana/long-form\n" + ) + path.write_text(new_body, encoding="utf-8") + reg.reload() + + assert reg.resolve_chain("mana/long-form") == ("groq/llama-3.3-70b-versatile",) + assert reg.default_alias == "mana/long-form" + # Aliases that disappeared from the new file are gone. + with pytest.raises(UnknownAliasError): + reg.resolve("mana/fast-text") + + def test_reload_keeps_old_state_on_parse_error(self, tmp_path: Path) -> None: + path = write_yaml(tmp_path, VALID_CONFIG) + reg = AliasRegistry(path) + # First reload fine — establish a baseline. + reg.reload() + + # Now break the file with an obviously invalid yaml. + path.write_text("aliases: [unclosed\n", encoding="utf-8") + with pytest.raises(AliasConfigError): + reg.reload() + + # The previous good state must still be queryable — service stays up. + assert reg.resolve_chain("mana/fast-text") == ( + "ollama/qwen2.5:7b", + "groq/llama-3.1-8b-instant", + ) + assert reg.default_alias == "mana/fast-text" + + def test_reload_keeps_old_state_on_schema_error(self, tmp_path: Path) -> None: + path = write_yaml(tmp_path, VALID_CONFIG) + reg = AliasRegistry(path) + + # Empty aliases — would be rejected on first load, must also be + # rejected here without nuking the in-memory state. + path.write_text("aliases: {}\n", encoding="utf-8") + with pytest.raises(AliasConfigError): + reg.reload() + + assert "mana/fast-text" in [a.name for a in reg.list_aliases()] + + +# --------------------------------------------------------------------------- +# Repo-shipped aliases.yaml is itself valid +# --------------------------------------------------------------------------- + + +class TestShippedConfig: + def test_repo_aliases_yaml_loads(self) -> None: + # The yaml file checked into services/mana-llm/aliases.yaml is the + # one that runs in production. It must always parse cleanly — this + # test catches editor accidents before they ship. + repo_yaml = Path(__file__).resolve().parents[1] / "aliases.yaml" + assert repo_yaml.exists(), f"shipped config missing at {repo_yaml}" + reg = AliasRegistry(repo_yaml) + # Sanity: the five classes the plan calls out must exist. + for expected in ( + "mana/fast-text", + "mana/long-form", + "mana/structured", + "mana/reasoning", + "mana/vision", + ): + reg.resolve(expected)