feat(observability): add metrics and monitoring for all 15 backends

- Add MetricsModule to 8 backends missing it (photos, zitare, mukke,
  planta, picture, storage, presi, nutriphi)
- Enable Prometheus scraping for all 15 backends in prometheus.yml
  (was only 6, with 3 commented out and 6 missing entirely)
- Update ServiceDown alert rule to cover all 15 backends
- Update Grafana dashboards (backends, master-overview, system-overview)
  with all backend services in health panels
- Fix imprecise regex in application-details dashboard

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-03-23 09:09:04 +01:00
parent 073c216652
commit 6fa6509fa5
23 changed files with 690 additions and 472 deletions

View file

@ -25,6 +25,7 @@
"@manacore/shared-drizzle-config": "workspace:*",
"@manacore/shared-nestjs-auth": "workspace:*",
"@manacore/shared-nestjs-health": "workspace:*",
"@manacore/shared-nestjs-metrics": "workspace:*",
"@manacore/shared-nestjs-setup": "workspace:*",
"@manacore/shared-storage": "workspace:*",
"@mukke/shared": "workspace:*",

View file

@ -12,6 +12,7 @@ import { SongModule } from './song/song.module';
import { PlaylistModule } from './playlist/playlist.module';
import { LibraryModule } from './library/library.module';
import { HealthModule } from '@manacore/shared-nestjs-health';
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
@Module({
imports: [
@ -31,6 +32,10 @@ import { HealthModule } from '@manacore/shared-nestjs-health';
PlaylistModule,
LibraryModule,
HealthModule.forRoot({ serviceName: 'mukke-backend' }),
MetricsModule.register({
prefix: 'mukke_',
excludePaths: ['/health'],
}),
],
})
export class AppModule {}

View file

@ -27,6 +27,7 @@
"@nutriphi/shared": "workspace:*",
"@manacore/shared-nestjs-auth": "workspace:*",
"@manacore/shared-nestjs-health": "workspace:*",
"@manacore/shared-nestjs-metrics": "workspace:*",
"@manacore/shared-nestjs-setup": "workspace:*",
"@google/generative-ai": "^0.21.0",
"@nestjs/common": "^10.4.15",

View file

@ -2,6 +2,7 @@ import { Module } from '@nestjs/common';
import { ConfigModule } from '@nestjs/config';
import { DatabaseModule } from './db/database.module';
import { HealthModule } from '@manacore/shared-nestjs-health';
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
import { MealModule } from './meal/meal.module';
import { GoalsModule } from './goals/goals.module';
import { FavoritesModule } from './favorites/favorites.module';
@ -17,6 +18,10 @@ import { RecommendationsModule } from './recommendations/recommendations.module'
}),
DatabaseModule,
HealthModule.forRoot({ serviceName: 'nutriphi-backend' }),
MetricsModule.register({
prefix: 'nutriphi_',
excludePaths: ['/health'],
}),
MealModule,
GoalsModule,
FavoritesModule,

View file

@ -19,6 +19,7 @@
"@manacore/shared-error-tracking": "workspace:*",
"@manacore/shared-nestjs-auth": "workspace:*",
"@manacore/shared-nestjs-health": "workspace:*",
"@manacore/shared-nestjs-metrics": "workspace:*",
"@nestjs/common": "^10.4.9",
"@nestjs/config": "^3.3.0",
"@nestjs/core": "^10.4.9",

View file

@ -1,6 +1,7 @@
import { Module } from '@nestjs/common';
import { ConfigModule } from '@nestjs/config';
import { HealthModule } from '@manacore/shared-nestjs-health';
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
import { DatabaseModule } from './db/database.module';
import { AlbumModule } from './album/album.module';
import { FavoriteModule } from './favorite/favorite.module';
@ -15,6 +16,10 @@ import { AdminModule } from './admin/admin.module';
envFilePath: '.env',
}),
HealthModule.forRoot({ serviceName: 'photos-backend' }),
MetricsModule.register({
prefix: 'photos_',
excludePaths: ['/health'],
}),
DatabaseModule,
AlbumModule,
FavoriteModule,

View file

@ -27,6 +27,7 @@
"@manacore/shared-errors": "workspace:*",
"@manacore/shared-nestjs-auth": "workspace:*",
"@manacore/shared-nestjs-health": "workspace:*",
"@manacore/shared-nestjs-metrics": "workspace:*",
"@manacore/shared-storage": "workspace:*",
"@nestjs/common": "^10.4.15",
"@nestjs/config": "^3.3.0",

View file

@ -4,6 +4,7 @@ import { ThrottlerModule } from '@nestjs/throttler';
import { ManaCoreModule } from '@manacore/nestjs-integration';
import { DatabaseModule } from './db/database.module';
import { HealthModule } from '@manacore/shared-nestjs-health';
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
import { ModelModule } from './model/model.module';
import { TagModule } from './tag/tag.module';
import { ImageModule } from './image/image.module';
@ -35,6 +36,10 @@ import { AdminModule } from './admin/admin.module';
}),
DatabaseModule,
HealthModule.forRoot({ serviceName: 'picture-backend' }),
MetricsModule.register({
prefix: 'picture_',
excludePaths: ['/health'],
}),
ModelModule,
TagModule,
ImageModule,

View file

@ -22,6 +22,7 @@
"@google/generative-ai": "^0.21.0",
"@manacore/shared-nestjs-auth": "workspace:*",
"@manacore/shared-nestjs-health": "workspace:*",
"@manacore/shared-nestjs-metrics": "workspace:*",
"@manacore/shared-nestjs-setup": "workspace:*",
"@manacore/shared-storage": "workspace:*",
"@nestjs/common": "^10.4.15",

View file

@ -2,6 +2,7 @@ import { Module } from '@nestjs/common';
import { ConfigModule } from '@nestjs/config';
import { DatabaseModule } from './db/database.module';
import { HealthModule } from '@manacore/shared-nestjs-health';
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
import { PlantModule } from './plant/plant.module';
import { PhotoModule } from './photo/photo.module';
import { AnalysisModule } from './analysis/analysis.module';
@ -15,6 +16,10 @@ import { WateringModule } from './watering/watering.module';
}),
DatabaseModule,
HealthModule.forRoot({ serviceName: 'planta-backend' }),
MetricsModule.register({
prefix: 'planta_',
excludePaths: ['/health'],
}),
PlantModule,
PhotoModule,
AnalysisModule,

View file

@ -22,6 +22,7 @@
"@manacore/shared-error-tracking": "workspace:*",
"@manacore/shared-nestjs-auth": "workspace:*",
"@manacore/shared-nestjs-health": "workspace:*",
"@manacore/shared-nestjs-metrics": "workspace:*",
"@manacore/shared-nestjs-setup": "workspace:*",
"@nestjs/common": "^10.4.15",
"@nestjs/config": "^3.3.0",

View file

@ -9,6 +9,7 @@ import { ThemeModule } from './theme/theme.module';
import { ShareModule } from './share/share.module';
import { AdminModule } from './admin/admin.module';
import { HealthModule } from '@manacore/shared-nestjs-health';
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
@Module({
imports: [
@ -24,6 +25,10 @@ import { HealthModule } from '@manacore/shared-nestjs-health';
ShareModule,
AdminModule,
HealthModule.forRoot({ serviceName: 'presi-backend' }),
MetricsModule.register({
prefix: 'presi_',
excludePaths: ['/health'],
}),
],
providers: [
{

View file

@ -24,6 +24,7 @@
"@manacore/shared-error-tracking": "workspace:*",
"@manacore/shared-nestjs-auth": "workspace:*",
"@manacore/shared-nestjs-health": "workspace:*",
"@manacore/shared-nestjs-metrics": "workspace:*",
"@manacore/shared-storage": "workspace:*",
"@nestjs/common": "^10.4.15",
"@nestjs/config": "^3.3.0",

View file

@ -3,6 +3,7 @@ import { ConfigModule } from '@nestjs/config';
import { ThrottlerModule } from '@nestjs/throttler';
import { DatabaseModule } from './db/database.module';
import { HealthModule } from '@manacore/shared-nestjs-health';
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
import { FileModule } from './file/file.module';
import { FolderModule } from './folder/folder.module';
import { ShareModule } from './share/share.module';
@ -25,6 +26,10 @@ import { AdminModule } from './admin/admin.module';
]),
DatabaseModule,
HealthModule.forRoot({ serviceName: 'storage-backend', route: 'api/v1/health' }),
MetricsModule.register({
prefix: 'storage_',
excludePaths: ['/health'],
}),
StorageModule,
FileModule,
FolderModule,

View file

@ -21,6 +21,7 @@
"@manacore/shared-error-tracking": "workspace:*",
"@manacore/shared-nestjs-auth": "workspace:*",
"@manacore/shared-nestjs-health": "workspace:*",
"@manacore/shared-nestjs-metrics": "workspace:*",
"@manacore/shared-nestjs-setup": "workspace:*",
"@nestjs/common": "^10.4.15",
"@nestjs/config": "^3.3.0",

View file

@ -4,6 +4,7 @@ import { DatabaseModule } from './db/database.module';
import { FavoriteModule } from './favorite/favorite.module';
import { ListModule } from './list/list.module';
import { HealthModule } from '@manacore/shared-nestjs-health';
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
import { AdminModule } from './admin/admin.module';
@Module({
@ -16,6 +17,10 @@ import { AdminModule } from './admin/admin.module';
FavoriteModule,
ListModule,
HealthModule.forRoot({ serviceName: 'quote-backend' }),
MetricsModule.register({
prefix: 'zitare_',
excludePaths: ['/health'],
}),
AdminModule,
],
})

View file

@ -850,14 +850,14 @@
"allValue": ".*",
"current": { "selected": true, "text": "All", "value": "$__all" },
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"definition": "label_values(up{job=~\".*backend|mana-core-auth\"}, job)",
"definition": "label_values(up{job=~\".*-backend|mana-core-auth\"}, job)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "service",
"options": [],
"query": {
"query": "label_values(up{job=~\".*backend|mana-core-auth\"}, job)",
"query": "label_values(up{job=~\".*-backend|mana-core-auth\"}, job)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 2,

View file

@ -58,7 +58,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=~\"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend\"}",
"expr": "up{job=~\"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend|storage-backend|presi-backend|nutriphi-backend|skilltree-backend|photos-backend|zitare-backend|mukke-backend|planta-backend|picture-backend\"}",
"legendFormat": "{{job}}",
"refId": "A"
}

View file

@ -444,6 +444,60 @@
"legendFormat": "Contacts",
"refId": "F"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"storage-backend\"}",
"legendFormat": "Storage",
"refId": "J"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"presi-backend\"}",
"legendFormat": "Presi",
"refId": "K"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"nutriphi-backend\"}",
"legendFormat": "NutriPhi",
"refId": "L"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"skilltree-backend\"}",
"legendFormat": "SkillTree",
"refId": "M"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"photos-backend\"}",
"legendFormat": "Photos",
"refId": "N"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"zitare-backend\"}",
"legendFormat": "Zitare",
"refId": "O"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"mukke-backend\"}",
"legendFormat": "Mukke",
"refId": "P"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"planta-backend\"}",
"legendFormat": "Planta",
"refId": "Q"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"picture-backend\"}",
"legendFormat": "Picture",
"refId": "R"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "pg_up",

View file

@ -528,6 +528,60 @@
"legendFormat": "Contacts",
"refId": "F"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"storage-backend\"}",
"legendFormat": "Storage",
"refId": "J"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"presi-backend\"}",
"legendFormat": "Presi",
"refId": "K"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"nutriphi-backend\"}",
"legendFormat": "NutriPhi",
"refId": "L"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"skilltree-backend\"}",
"legendFormat": "SkillTree",
"refId": "M"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"photos-backend\"}",
"legendFormat": "Photos",
"refId": "N"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"zitare-backend\"}",
"legendFormat": "Zitare",
"refId": "O"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"mukke-backend\"}",
"legendFormat": "Mukke",
"refId": "P"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"planta-backend\"}",
"legendFormat": "Planta",
"refId": "Q"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"picture-backend\"}",
"legendFormat": "Picture",
"refId": "R"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "pg_up",

View file

@ -3,7 +3,7 @@ groups:
rules:
# Service Down Alert
- alert: ServiceDown
expr: up{job=~"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend"} == 0
expr: up{job=~"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend|storage-backend|presi-backend|nutriphi-backend|skilltree-backend|photos-backend|zitare-backend|mukke-backend|planta-backend|picture-backend"} == 0
for: 1m
labels:
severity: critical

View file

@ -91,26 +91,68 @@ scrape_configs:
metrics_path: '/metrics'
scrape_interval: 30s
# Storage Backend (disabled - no /metrics endpoint yet)
# - job_name: 'storage-backend'
# static_configs:
# - targets: ['storage-backend:3035']
# metrics_path: '/metrics'
# scrape_interval: 30s
# Storage Backend
- job_name: 'storage-backend'
static_configs:
- targets: ['storage-backend:3035']
metrics_path: '/metrics'
scrape_interval: 30s
# Presi Backend (disabled - no /metrics endpoint yet)
# - job_name: 'presi-backend'
# static_configs:
# - targets: ['presi-backend:3036']
# metrics_path: '/metrics'
# scrape_interval: 30s
# Presi Backend
- job_name: 'presi-backend'
static_configs:
- targets: ['presi-backend:3036']
metrics_path: '/metrics'
scrape_interval: 30s
# Nutriphi Backend (disabled - no /metrics endpoint yet)
# - job_name: 'nutriphi-backend'
# static_configs:
# - targets: ['nutriphi-backend:3037']
# metrics_path: '/metrics'
# scrape_interval: 30s
# Nutriphi Backend
- job_name: 'nutriphi-backend'
static_configs:
- targets: ['nutriphi-backend:3037']
metrics_path: '/metrics'
scrape_interval: 30s
# SkillTree Backend
- job_name: 'skilltree-backend'
static_configs:
- targets: ['skilltree-backend:3038']
metrics_path: '/metrics'
scrape_interval: 30s
# Photos Backend
- job_name: 'photos-backend'
static_configs:
- targets: ['photos-backend:3039']
metrics_path: '/metrics'
scrape_interval: 30s
# Zitare Backend
- job_name: 'zitare-backend'
static_configs:
- targets: ['zitare-backend:3007']
metrics_path: '/metrics'
scrape_interval: 30s
# Mukke Backend
- job_name: 'mukke-backend'
static_configs:
- targets: ['mukke-backend:3010']
metrics_path: '/metrics'
scrape_interval: 30s
# Planta Backend
- job_name: 'planta-backend'
static_configs:
- targets: ['planta-backend:3022']
metrics_path: '/metrics'
scrape_interval: 30s
# Picture Backend
- job_name: 'picture-backend'
static_configs:
- targets: ['picture-backend:3040']
metrics_path: '/metrics'
scrape_interval: 30s
# ============================================
# Pushgateway (deploy metrics, batch jobs)

920
pnpm-lock.yaml generated

File diff suppressed because it is too large Load diff