mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 19:01:08 +02:00
feat(monitoring): add LLM Grafana dashboard, Prometheus scraping, and alerts
Wire mana-llm service into the monitoring stack: Prometheus (docker/prometheus/prometheus.yml): - Add mana-llm scrape job (port 3025, 15s interval) - Include mana-llm in ServiceDown alert expression Alerts (docker/prometheus/alerts.yml): - New llm_alerts group with 4 rules: - LLMServiceDown: mana-llm down > 1 min (critical) - LLMHighErrorRate: > 10% errors for 5 min (warning) - OllamaProviderDown: > 50% requests via Google fallback (warning) - LLMSlowResponses: p95 > 30s for 5 min (warning) Grafana Dashboard (docker/grafana/dashboards/mana-llm.json): - 6 stat panels: status, req/min, error rate, fallback rate, latency, tokens/min - Requests by Provider (stacked area: Ollama vs Google vs OpenRouter) - Tokens by Type (prompt vs completion) - Latency Percentiles (p50, p90, p99) - Latency by Provider comparison - Requests by Model breakdown - Errors by Type - Google Fallback Rate over time (with threshold coloring) - Provider Distribution pie chart (24h) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
57a2841168
commit
169821de1a
3 changed files with 477 additions and 1 deletions
|
|
@ -3,7 +3,7 @@ groups:
|
|||
rules:
|
||||
# Service Down Alert
|
||||
- alert: ServiceDown
|
||||
expr: up{job=~"mana-core-auth|.*-backend|mana-search|mana-media|synapse"} == 0
|
||||
expr: up{job=~"mana-core-auth|.*-backend|mana-search|mana-media|mana-llm|synapse"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
|
|
@ -356,3 +356,50 @@ groups:
|
|||
annotations:
|
||||
summary: "OIDC token endpoint errors"
|
||||
description: "OIDC token endpoint is returning 5xx errors. SSO may be affected."
|
||||
|
||||
- name: llm_alerts
|
||||
rules:
|
||||
# mana-llm Down
|
||||
- alert: LLMServiceDown
|
||||
expr: up{job="mana-llm"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "mana-llm service is down"
|
||||
description: "mana-llm has been down for more than 1 minute. All AI features will fail."
|
||||
|
||||
# High LLM Error Rate (> 10%)
|
||||
- alert: LLMHighErrorRate
|
||||
expr: |
|
||||
sum(rate(mana_llm_llm_errors_total[5m]))
|
||||
/ (sum(rate(mana_llm_llm_requests_total[5m])) + 0.001) > 0.1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High LLM error rate"
|
||||
description: "{{ $value | humanizePercentage }} of LLM requests are failing."
|
||||
|
||||
# Ollama Provider Down (all requests going to fallback)
|
||||
- alert: OllamaProviderDown
|
||||
expr: |
|
||||
sum(rate(mana_llm_llm_requests_total{provider="google"}[5m]))
|
||||
/ (sum(rate(mana_llm_llm_requests_total[5m])) + 0.001) > 0.5
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Ollama appears down — most requests going to Google fallback"
|
||||
description: "{{ $value | humanizePercentage }} of LLM requests are using Google Gemini fallback."
|
||||
|
||||
# LLM Slow Responses (p95 > 30s)
|
||||
- alert: LLMSlowResponses
|
||||
expr: |
|
||||
histogram_quantile(0.95, sum(rate(mana_llm_llm_latency_seconds_bucket[5m])) by (le)) > 30
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "LLM responses are slow"
|
||||
description: "LLM p95 latency is {{ $value | humanizeDuration }}."
|
||||
|
|
|
|||
|
|
@ -158,6 +158,13 @@ scrape_configs:
|
|||
# Core Services
|
||||
# ============================================
|
||||
|
||||
# Mana LLM Gateway (Ollama + Google Fallback)
|
||||
- job_name: 'mana-llm'
|
||||
static_configs:
|
||||
- targets: ['mana-llm:3025']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 15s
|
||||
|
||||
# Mana Search Service
|
||||
- job_name: 'mana-search'
|
||||
static_configs:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue