feat(observability): add mana-search, mana-media, and Synapse to monitoring

- Add Prometheus scraping for mana-search (port 3020, already has metrics)
- Add Prometheus scraping for mana-media (port 3015, MetricsModule added)
- Add Prometheus scraping for Matrix Synapse (port 9002, already enabled)
- Add MetricsModule to mana-media with media_ prefix
- Update Dockerfile for mana-media to include shared-nestjs-metrics
- Replace hardcoded ServiceDown alert list with dynamic regex
  (.*-backend|mana-core-auth|mana-search|mana-media|synapse)
- Replace hardcoded backends.json query with dynamic regex
- Add Search, Media, Synapse to master-overview and system-overview dashboards

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-03-23 10:46:59 +01:00
parent 5bcbb4b71d
commit 143112f77a
9 changed files with 1160 additions and 310 deletions

View file

@ -58,7 +58,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=~\"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend|storage-backend|presi-backend|nutriphi-backend|skilltree-backend|photos-backend|zitare-backend|mukke-backend|planta-backend|picture-backend\"}",
"expr": "up{job=~\"mana-core-auth|.*-backend|mana-search|mana-media\"}",
"legendFormat": "{{job}}",
"refId": "A"
}

View file

@ -498,6 +498,24 @@
"legendFormat": "Picture",
"refId": "R"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"mana-search\"}",
"legendFormat": "Search",
"refId": "S"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"mana-media\"}",
"legendFormat": "Media",
"refId": "T"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"synapse\"}",
"legendFormat": "Synapse",
"refId": "U"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "pg_up",

View file

@ -625,6 +625,24 @@
"legendFormat": "Picture",
"refId": "R"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"mana-search\"}",
"legendFormat": "Search",
"refId": "S"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"mana-media\"}",
"legendFormat": "Media",
"refId": "T"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "up{job=\"synapse\"}",
"legendFormat": "Synapse",
"refId": "U"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "pg_up",

View file

@ -3,7 +3,7 @@ groups:
rules:
# Service Down Alert
- alert: ServiceDown
expr: up{job=~"mana-core-auth|chat-backend|todo-backend|calendar-backend|clock-backend|contacts-backend|storage-backend|presi-backend|nutriphi-backend|skilltree-backend|photos-backend|zitare-backend|mukke-backend|planta-backend|picture-backend"} == 0
expr: up{job=~"mana-core-auth|.*-backend|mana-search|mana-media|synapse"} == 0
for: 1m
labels:
severity: critical

View file

@ -154,6 +154,31 @@ scrape_configs:
metrics_path: '/metrics'
scrape_interval: 30s
# ============================================
# Core Services
# ============================================
# Mana Search Service
- job_name: 'mana-search'
static_configs:
- targets: ['mana-search:3020']
metrics_path: '/metrics'
scrape_interval: 30s
# Mana Media Service
- job_name: 'mana-media'
static_configs:
- targets: ['mana-media:3015']
metrics_path: '/metrics'
scrape_interval: 30s
# Matrix Synapse
- job_name: 'synapse'
static_configs:
- targets: ['synapse:9002']
metrics_path: '/_synapse/metrics'
scrape_interval: 30s
# ============================================
# Pushgateway (deploy metrics, batch jobs)
# ============================================

1397
pnpm-lock.yaml generated

File diff suppressed because it is too large Load diff

View file

@ -9,6 +9,7 @@ WORKDIR /app
# Copy all necessary files
COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
COPY packages/shared-drizzle-config ./packages/shared-drizzle-config
COPY packages/shared-nestjs-metrics ./packages/shared-nestjs-metrics
COPY services/mana-media ./services/mana-media
# Install all dependencies
@ -40,6 +41,7 @@ COPY --from=builder --chown=nestjs:nodejs /app/services/mana-media/apps/api/node
# Copy shared packages that are symlinked
COPY --from=builder --chown=nestjs:nodejs /app/packages/shared-drizzle-config /app/packages/shared-drizzle-config
COPY --from=builder --chown=nestjs:nodejs /app/packages/shared-nestjs-metrics /app/packages/shared-nestjs-metrics
# Copy built application
COPY --from=builder --chown=nestjs:nodejs /app/services/mana-media/apps/api/dist ./dist

View file

@ -13,6 +13,7 @@
"db:studio": "drizzle-kit studio"
},
"dependencies": {
"@manacore/shared-nestjs-metrics": "workspace:*",
"@nestjs/bullmq": "^11.0.0",
"@nestjs/common": "^11.0.0",
"@nestjs/config": "^3.3.0",

View file

@ -1,6 +1,7 @@
import { Module } from '@nestjs/common';
import { ConfigModule } from '@nestjs/config';
import { BullModule } from '@nestjs/bullmq';
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
import { DatabaseModule } from './db/database.module';
import { UploadModule } from './modules/upload/upload.module';
import { StorageModule } from './modules/storage/storage.module';
@ -14,6 +15,10 @@ import { HealthController } from './health.controller';
ConfigModule.forRoot({
isGlobal: true,
}),
MetricsModule.register({
prefix: 'media_',
excludePaths: ['/health'],
}),
BullModule.forRoot({
connection: {
host: process.env.REDIS_HOST || 'localhost',