mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-19 13:13:35 +02:00
feat(observability): add metrics and monitoring for all 15 backends
- Add MetricsModule to 8 backends missing it (photos, zitare, mukke, planta, picture, storage, presi, nutriphi) - Enable Prometheus scraping for all 15 backends in prometheus.yml (was only 6, with 3 commented out and 6 missing entirely) - Update ServiceDown alert rule to cover all 15 backends - Update Grafana dashboards (backends, master-overview, system-overview) with all backend services in health panels - Fix imprecise regex in application-details dashboard Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
073c216652
commit
6fa6509fa5
23 changed files with 690 additions and 472 deletions
|
|
@ -850,14 +850,14 @@
|
|||
"allValue": ".*",
|
||||
"current": { "selected": true, "text": "All", "value": "$__all" },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"definition": "label_values(up{job=~\".*backend|mana-core-auth\"}, job)",
|
||||
"definition": "label_values(up{job=~\".*-backend|mana-core-auth\"}, job)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "service",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(up{job=~\".*backend|mana-core-auth\"}, job)",
|
||||
"query": "label_values(up{job=~\".*-backend|mana-core-auth\"}, job)",
|
||||
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@
|
|||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=~\"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend\"}",
|
||||
"expr": "up{job=~\"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend|storage-backend|presi-backend|nutriphi-backend|skilltree-backend|photos-backend|zitare-backend|mukke-backend|planta-backend|picture-backend\"}",
|
||||
"legendFormat": "{{job}}",
|
||||
"refId": "A"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -444,6 +444,60 @@
|
|||
"legendFormat": "Contacts",
|
||||
"refId": "F"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"storage-backend\"}",
|
||||
"legendFormat": "Storage",
|
||||
"refId": "J"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"presi-backend\"}",
|
||||
"legendFormat": "Presi",
|
||||
"refId": "K"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"nutriphi-backend\"}",
|
||||
"legendFormat": "NutriPhi",
|
||||
"refId": "L"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"skilltree-backend\"}",
|
||||
"legendFormat": "SkillTree",
|
||||
"refId": "M"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"photos-backend\"}",
|
||||
"legendFormat": "Photos",
|
||||
"refId": "N"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"zitare-backend\"}",
|
||||
"legendFormat": "Zitare",
|
||||
"refId": "O"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"mukke-backend\"}",
|
||||
"legendFormat": "Mukke",
|
||||
"refId": "P"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"planta-backend\"}",
|
||||
"legendFormat": "Planta",
|
||||
"refId": "Q"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"picture-backend\"}",
|
||||
"legendFormat": "Picture",
|
||||
"refId": "R"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "pg_up",
|
||||
|
|
|
|||
|
|
@ -528,6 +528,60 @@
|
|||
"legendFormat": "Contacts",
|
||||
"refId": "F"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"storage-backend\"}",
|
||||
"legendFormat": "Storage",
|
||||
"refId": "J"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"presi-backend\"}",
|
||||
"legendFormat": "Presi",
|
||||
"refId": "K"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"nutriphi-backend\"}",
|
||||
"legendFormat": "NutriPhi",
|
||||
"refId": "L"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"skilltree-backend\"}",
|
||||
"legendFormat": "SkillTree",
|
||||
"refId": "M"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"photos-backend\"}",
|
||||
"legendFormat": "Photos",
|
||||
"refId": "N"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"zitare-backend\"}",
|
||||
"legendFormat": "Zitare",
|
||||
"refId": "O"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"mukke-backend\"}",
|
||||
"legendFormat": "Mukke",
|
||||
"refId": "P"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"planta-backend\"}",
|
||||
"legendFormat": "Planta",
|
||||
"refId": "Q"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"picture-backend\"}",
|
||||
"legendFormat": "Picture",
|
||||
"refId": "R"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "pg_up",
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ groups:
|
|||
rules:
|
||||
# Service Down Alert
|
||||
- alert: ServiceDown
|
||||
expr: up{job=~"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend"} == 0
|
||||
expr: up{job=~"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend|storage-backend|presi-backend|nutriphi-backend|skilltree-backend|photos-backend|zitare-backend|mukke-backend|planta-backend|picture-backend"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
|
|
|
|||
|
|
@ -91,26 +91,68 @@ scrape_configs:
|
|||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Storage Backend (disabled - no /metrics endpoint yet)
|
||||
# - job_name: 'storage-backend'
|
||||
# static_configs:
|
||||
# - targets: ['storage-backend:3035']
|
||||
# metrics_path: '/metrics'
|
||||
# scrape_interval: 30s
|
||||
# Storage Backend
|
||||
- job_name: 'storage-backend'
|
||||
static_configs:
|
||||
- targets: ['storage-backend:3035']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Presi Backend (disabled - no /metrics endpoint yet)
|
||||
# - job_name: 'presi-backend'
|
||||
# static_configs:
|
||||
# - targets: ['presi-backend:3036']
|
||||
# metrics_path: '/metrics'
|
||||
# scrape_interval: 30s
|
||||
# Presi Backend
|
||||
- job_name: 'presi-backend'
|
||||
static_configs:
|
||||
- targets: ['presi-backend:3036']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Nutriphi Backend (disabled - no /metrics endpoint yet)
|
||||
# - job_name: 'nutriphi-backend'
|
||||
# static_configs:
|
||||
# - targets: ['nutriphi-backend:3037']
|
||||
# metrics_path: '/metrics'
|
||||
# scrape_interval: 30s
|
||||
# Nutriphi Backend
|
||||
- job_name: 'nutriphi-backend'
|
||||
static_configs:
|
||||
- targets: ['nutriphi-backend:3037']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# SkillTree Backend
|
||||
- job_name: 'skilltree-backend'
|
||||
static_configs:
|
||||
- targets: ['skilltree-backend:3038']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Photos Backend
|
||||
- job_name: 'photos-backend'
|
||||
static_configs:
|
||||
- targets: ['photos-backend:3039']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Zitare Backend
|
||||
- job_name: 'zitare-backend'
|
||||
static_configs:
|
||||
- targets: ['zitare-backend:3007']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Mukke Backend
|
||||
- job_name: 'mukke-backend'
|
||||
static_configs:
|
||||
- targets: ['mukke-backend:3010']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Planta Backend
|
||||
- job_name: 'planta-backend'
|
||||
static_configs:
|
||||
- targets: ['planta-backend:3022']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Picture Backend
|
||||
- job_name: 'picture-backend'
|
||||
static_configs:
|
||||
- targets: ['picture-backend:3040']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# ============================================
|
||||
# Pushgateway (deploy metrics, batch jobs)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue