managarten/services/mana-llm/aliases.yaml

# mana-llm Model Aliases — single source of truth for which class of
# model each backend feature uses.
#
# Consumers (mana-api, mana-ai, …) send `"model": "mana/<class>"` in
# their /v1/chat/completions requests; mana-llm resolves the alias to
# the chain below and tries entries in order, skipping providers that
# the health-cache has marked unhealthy.
#
# Order in `chain` = preference. First healthy entry wins. Each chain
# should end with a cloud provider so the system stays functional even
# when the local GPU server (mana-gpu, RTX 3090) is offline.
#
# Reload at runtime: `kill -HUP <pid>` after editing — no restart needed.
# Reference: docs/plans/llm-fallback-aliases.md.

aliases:
  mana/fast-text:
    description: "Short answers, classification, single-shot Q&A"
    chain:
      - ollama/qwen2.5:7b
      - groq/llama-3.1-8b-instant
      - openrouter/anthropic/claude-3-haiku

  mana/long-form:
    description: "Writing, essays, stories, longer prose"
    chain:
      - ollama/gemma3:12b
      - groq/llama-3.3-70b-versatile
      - openrouter/anthropic/claude-3.5-haiku

  mana/structured:
    description: "JSON output (comic storyboards, research subqueries, tag suggestions)"
    chain:
      - ollama/qwen2.5:7b
      - groq/llama-3.1-8b-instant
      - openrouter/openai/gpt-4o-mini

  mana/reasoning:
    description: "Agent missions, tool calls, multi-step plans"
    # Cloud first by design — local 4-7B models are unreliable for tool calls
    chain:
      - openrouter/anthropic/claude-3.5-sonnet
      - groq/llama-3.3-70b-versatile

  mana/vision:
    description: "Multimodal (image + text)"
    chain:
      - ollama/llava:7b
      - google/gemini-2.0-flash-exp
      - openrouter/openai/gpt-4o

# Default alias used when a request omits `model` or sends an unknown
# value with no provider prefix. Keep this conservative (cheap class).
default: mana/fast-text