mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-15 05:21:10 +02:00
feat(observability): add metrics and monitoring for all 15 backends
- Add MetricsModule to 8 backends missing it (photos, zitare, mukke, planta, picture, storage, presi, nutriphi) - Enable Prometheus scraping for all 15 backends in prometheus.yml (was only 6, with 3 commented out and 6 missing entirely) - Update ServiceDown alert rule to cover all 15 backends - Update Grafana dashboards (backends, master-overview, system-overview) with all backend services in health panels - Fix imprecise regex in application-details dashboard Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
073c216652
commit
6fa6509fa5
23 changed files with 690 additions and 472 deletions
|
|
@ -25,6 +25,7 @@
|
|||
"@manacore/shared-drizzle-config": "workspace:*",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
"@manacore/shared-nestjs-setup": "workspace:*",
|
||||
"@manacore/shared-storage": "workspace:*",
|
||||
"@mukke/shared": "workspace:*",
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import { SongModule } from './song/song.module';
|
|||
import { PlaylistModule } from './playlist/playlist.module';
|
||||
import { LibraryModule } from './library/library.module';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
|
||||
@Module({
|
||||
imports: [
|
||||
|
|
@ -31,6 +32,10 @@ import { HealthModule } from '@manacore/shared-nestjs-health';
|
|||
PlaylistModule,
|
||||
LibraryModule,
|
||||
HealthModule.forRoot({ serviceName: 'mukke-backend' }),
|
||||
MetricsModule.register({
|
||||
prefix: 'mukke_',
|
||||
excludePaths: ['/health'],
|
||||
}),
|
||||
],
|
||||
})
|
||||
export class AppModule {}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
"@nutriphi/shared": "workspace:*",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
"@manacore/shared-nestjs-setup": "workspace:*",
|
||||
"@google/generative-ai": "^0.21.0",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { Module } from '@nestjs/common';
|
|||
import { ConfigModule } from '@nestjs/config';
|
||||
import { DatabaseModule } from './db/database.module';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
import { MealModule } from './meal/meal.module';
|
||||
import { GoalsModule } from './goals/goals.module';
|
||||
import { FavoritesModule } from './favorites/favorites.module';
|
||||
|
|
@ -17,6 +18,10 @@ import { RecommendationsModule } from './recommendations/recommendations.module'
|
|||
}),
|
||||
DatabaseModule,
|
||||
HealthModule.forRoot({ serviceName: 'nutriphi-backend' }),
|
||||
MetricsModule.register({
|
||||
prefix: 'nutriphi_',
|
||||
excludePaths: ['/health'],
|
||||
}),
|
||||
MealModule,
|
||||
GoalsModule,
|
||||
FavoritesModule,
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@
|
|||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
"@nestjs/common": "^10.4.9",
|
||||
"@nestjs/config": "^3.3.0",
|
||||
"@nestjs/core": "^10.4.9",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ConfigModule } from '@nestjs/config';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
import { DatabaseModule } from './db/database.module';
|
||||
import { AlbumModule } from './album/album.module';
|
||||
import { FavoriteModule } from './favorite/favorite.module';
|
||||
|
|
@ -15,6 +16,10 @@ import { AdminModule } from './admin/admin.module';
|
|||
envFilePath: '.env',
|
||||
}),
|
||||
HealthModule.forRoot({ serviceName: 'photos-backend' }),
|
||||
MetricsModule.register({
|
||||
prefix: 'photos_',
|
||||
excludePaths: ['/health'],
|
||||
}),
|
||||
DatabaseModule,
|
||||
AlbumModule,
|
||||
FavoriteModule,
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
"@manacore/shared-errors": "workspace:*",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
"@manacore/shared-storage": "workspace:*",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
"@nestjs/config": "^3.3.0",
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { ThrottlerModule } from '@nestjs/throttler';
|
|||
import { ManaCoreModule } from '@manacore/nestjs-integration';
|
||||
import { DatabaseModule } from './db/database.module';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
import { ModelModule } from './model/model.module';
|
||||
import { TagModule } from './tag/tag.module';
|
||||
import { ImageModule } from './image/image.module';
|
||||
|
|
@ -35,6 +36,10 @@ import { AdminModule } from './admin/admin.module';
|
|||
}),
|
||||
DatabaseModule,
|
||||
HealthModule.forRoot({ serviceName: 'picture-backend' }),
|
||||
MetricsModule.register({
|
||||
prefix: 'picture_',
|
||||
excludePaths: ['/health'],
|
||||
}),
|
||||
ModelModule,
|
||||
TagModule,
|
||||
ImageModule,
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
"@google/generative-ai": "^0.21.0",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
"@manacore/shared-nestjs-setup": "workspace:*",
|
||||
"@manacore/shared-storage": "workspace:*",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { Module } from '@nestjs/common';
|
|||
import { ConfigModule } from '@nestjs/config';
|
||||
import { DatabaseModule } from './db/database.module';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
import { PlantModule } from './plant/plant.module';
|
||||
import { PhotoModule } from './photo/photo.module';
|
||||
import { AnalysisModule } from './analysis/analysis.module';
|
||||
|
|
@ -15,6 +16,10 @@ import { WateringModule } from './watering/watering.module';
|
|||
}),
|
||||
DatabaseModule,
|
||||
HealthModule.forRoot({ serviceName: 'planta-backend' }),
|
||||
MetricsModule.register({
|
||||
prefix: 'planta_',
|
||||
excludePaths: ['/health'],
|
||||
}),
|
||||
PlantModule,
|
||||
PhotoModule,
|
||||
AnalysisModule,
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
"@manacore/shared-nestjs-setup": "workspace:*",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
"@nestjs/config": "^3.3.0",
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import { ThemeModule } from './theme/theme.module';
|
|||
import { ShareModule } from './share/share.module';
|
||||
import { AdminModule } from './admin/admin.module';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
|
||||
@Module({
|
||||
imports: [
|
||||
|
|
@ -24,6 +25,10 @@ import { HealthModule } from '@manacore/shared-nestjs-health';
|
|||
ShareModule,
|
||||
AdminModule,
|
||||
HealthModule.forRoot({ serviceName: 'presi-backend' }),
|
||||
MetricsModule.register({
|
||||
prefix: 'presi_',
|
||||
excludePaths: ['/health'],
|
||||
}),
|
||||
],
|
||||
providers: [
|
||||
{
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
"@manacore/shared-storage": "workspace:*",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
"@nestjs/config": "^3.3.0",
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { ConfigModule } from '@nestjs/config';
|
|||
import { ThrottlerModule } from '@nestjs/throttler';
|
||||
import { DatabaseModule } from './db/database.module';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
import { FileModule } from './file/file.module';
|
||||
import { FolderModule } from './folder/folder.module';
|
||||
import { ShareModule } from './share/share.module';
|
||||
|
|
@ -25,6 +26,10 @@ import { AdminModule } from './admin/admin.module';
|
|||
]),
|
||||
DatabaseModule,
|
||||
HealthModule.forRoot({ serviceName: 'storage-backend', route: 'api/v1/health' }),
|
||||
MetricsModule.register({
|
||||
prefix: 'storage_',
|
||||
excludePaths: ['/health'],
|
||||
}),
|
||||
StorageModule,
|
||||
FileModule,
|
||||
FolderModule,
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
"@manacore/shared-nestjs-setup": "workspace:*",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
"@nestjs/config": "^3.3.0",
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { DatabaseModule } from './db/database.module';
|
|||
import { FavoriteModule } from './favorite/favorite.module';
|
||||
import { ListModule } from './list/list.module';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
import { AdminModule } from './admin/admin.module';
|
||||
|
||||
@Module({
|
||||
|
|
@ -16,6 +17,10 @@ import { AdminModule } from './admin/admin.module';
|
|||
FavoriteModule,
|
||||
ListModule,
|
||||
HealthModule.forRoot({ serviceName: 'quote-backend' }),
|
||||
MetricsModule.register({
|
||||
prefix: 'zitare_',
|
||||
excludePaths: ['/health'],
|
||||
}),
|
||||
AdminModule,
|
||||
],
|
||||
})
|
||||
|
|
|
|||
|
|
@ -850,14 +850,14 @@
|
|||
"allValue": ".*",
|
||||
"current": { "selected": true, "text": "All", "value": "$__all" },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"definition": "label_values(up{job=~\".*backend|mana-core-auth\"}, job)",
|
||||
"definition": "label_values(up{job=~\".*-backend|mana-core-auth\"}, job)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "service",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(up{job=~\".*backend|mana-core-auth\"}, job)",
|
||||
"query": "label_values(up{job=~\".*-backend|mana-core-auth\"}, job)",
|
||||
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@
|
|||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=~\"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend\"}",
|
||||
"expr": "up{job=~\"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend|storage-backend|presi-backend|nutriphi-backend|skilltree-backend|photos-backend|zitare-backend|mukke-backend|planta-backend|picture-backend\"}",
|
||||
"legendFormat": "{{job}}",
|
||||
"refId": "A"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -444,6 +444,60 @@
|
|||
"legendFormat": "Contacts",
|
||||
"refId": "F"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"storage-backend\"}",
|
||||
"legendFormat": "Storage",
|
||||
"refId": "J"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"presi-backend\"}",
|
||||
"legendFormat": "Presi",
|
||||
"refId": "K"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"nutriphi-backend\"}",
|
||||
"legendFormat": "NutriPhi",
|
||||
"refId": "L"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"skilltree-backend\"}",
|
||||
"legendFormat": "SkillTree",
|
||||
"refId": "M"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"photos-backend\"}",
|
||||
"legendFormat": "Photos",
|
||||
"refId": "N"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"zitare-backend\"}",
|
||||
"legendFormat": "Zitare",
|
||||
"refId": "O"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"mukke-backend\"}",
|
||||
"legendFormat": "Mukke",
|
||||
"refId": "P"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"planta-backend\"}",
|
||||
"legendFormat": "Planta",
|
||||
"refId": "Q"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"picture-backend\"}",
|
||||
"legendFormat": "Picture",
|
||||
"refId": "R"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "pg_up",
|
||||
|
|
|
|||
|
|
@ -528,6 +528,60 @@
|
|||
"legendFormat": "Contacts",
|
||||
"refId": "F"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"storage-backend\"}",
|
||||
"legendFormat": "Storage",
|
||||
"refId": "J"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"presi-backend\"}",
|
||||
"legendFormat": "Presi",
|
||||
"refId": "K"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"nutriphi-backend\"}",
|
||||
"legendFormat": "NutriPhi",
|
||||
"refId": "L"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"skilltree-backend\"}",
|
||||
"legendFormat": "SkillTree",
|
||||
"refId": "M"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"photos-backend\"}",
|
||||
"legendFormat": "Photos",
|
||||
"refId": "N"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"zitare-backend\"}",
|
||||
"legendFormat": "Zitare",
|
||||
"refId": "O"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"mukke-backend\"}",
|
||||
"legendFormat": "Mukke",
|
||||
"refId": "P"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"planta-backend\"}",
|
||||
"legendFormat": "Planta",
|
||||
"refId": "Q"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "up{job=\"picture-backend\"}",
|
||||
"legendFormat": "Picture",
|
||||
"refId": "R"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "pg_up",
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ groups:
|
|||
rules:
|
||||
# Service Down Alert
|
||||
- alert: ServiceDown
|
||||
expr: up{job=~"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend"} == 0
|
||||
expr: up{job=~"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend|storage-backend|presi-backend|nutriphi-backend|skilltree-backend|photos-backend|zitare-backend|mukke-backend|planta-backend|picture-backend"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
|
|
|
|||
|
|
@ -91,26 +91,68 @@ scrape_configs:
|
|||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Storage Backend (disabled - no /metrics endpoint yet)
|
||||
# - job_name: 'storage-backend'
|
||||
# static_configs:
|
||||
# - targets: ['storage-backend:3035']
|
||||
# metrics_path: '/metrics'
|
||||
# scrape_interval: 30s
|
||||
# Storage Backend
|
||||
- job_name: 'storage-backend'
|
||||
static_configs:
|
||||
- targets: ['storage-backend:3035']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Presi Backend (disabled - no /metrics endpoint yet)
|
||||
# - job_name: 'presi-backend'
|
||||
# static_configs:
|
||||
# - targets: ['presi-backend:3036']
|
||||
# metrics_path: '/metrics'
|
||||
# scrape_interval: 30s
|
||||
# Presi Backend
|
||||
- job_name: 'presi-backend'
|
||||
static_configs:
|
||||
- targets: ['presi-backend:3036']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Nutriphi Backend (disabled - no /metrics endpoint yet)
|
||||
# - job_name: 'nutriphi-backend'
|
||||
# static_configs:
|
||||
# - targets: ['nutriphi-backend:3037']
|
||||
# metrics_path: '/metrics'
|
||||
# scrape_interval: 30s
|
||||
# Nutriphi Backend
|
||||
- job_name: 'nutriphi-backend'
|
||||
static_configs:
|
||||
- targets: ['nutriphi-backend:3037']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# SkillTree Backend
|
||||
- job_name: 'skilltree-backend'
|
||||
static_configs:
|
||||
- targets: ['skilltree-backend:3038']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Photos Backend
|
||||
- job_name: 'photos-backend'
|
||||
static_configs:
|
||||
- targets: ['photos-backend:3039']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Zitare Backend
|
||||
- job_name: 'zitare-backend'
|
||||
static_configs:
|
||||
- targets: ['zitare-backend:3007']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Mukke Backend
|
||||
- job_name: 'mukke-backend'
|
||||
static_configs:
|
||||
- targets: ['mukke-backend:3010']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Planta Backend
|
||||
- job_name: 'planta-backend'
|
||||
static_configs:
|
||||
- targets: ['planta-backend:3022']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# Picture Backend
|
||||
- job_name: 'picture-backend'
|
||||
static_configs:
|
||||
- targets: ['picture-backend:3040']
|
||||
metrics_path: '/metrics'
|
||||
scrape_interval: 30s
|
||||
|
||||
# ============================================
|
||||
# Pushgateway (deploy metrics, batch jobs)
|
||||
|
|
|
|||
920
pnpm-lock.yaml
generated
920
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue