managarten/docker/prometheus/prometheus.yml
Till-JS acc8de36ee feat(monitoring): add alerting stack and maintenance scripts
Medium priority stability improvements:

Alerting:
- Add vmalert for evaluating Prometheus alert rules
- Add alertmanager for alert routing and grouping
- Add alert-notifier service for Telegram/ntfy notifications
- Enable cadvisor scraping in prometheus config

Disk Monitoring:
- Add check-disk-space.sh for hourly disk monitoring
- Alert on 80% (warning) and 90% (critical) thresholds
- Auto-cleanup Docker when disk is critical
- Add com.manacore.disk-check.plist for LaunchD

Weekly Reports:
- Add weekly-report.sh for system health summary
- Includes: backup status, disk usage, container health,
  database stats, error log summary
- Runs every Sunday at 10 AM via LaunchD

Health Check Updates:
- Add checks for vmalert, alertmanager, alert-notifier

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-12 13:46:57 +01:00

113 lines
2.8 KiB
YAML

# ManaCore Prometheus Configuration
# Scrapes metrics from all services
global:
scrape_interval: 15s
evaluation_interval: 15s
# Load alerting rules
rule_files:
- /etc/prometheus/alerts.yml
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: ['alertmanager:9093']
scrape_configs:
# Prometheus self-monitoring
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# Host system metrics via node-exporter
- job_name: 'node'
static_configs:
- targets: ['node-exporter:9100']
relabel_configs:
- source_labels: [__address__]
target_label: instance
replacement: 'mac-mini'
# Docker container metrics via cAdvisor
- job_name: 'cadvisor'
static_configs:
- targets: ['cadvisor:8080']
# PostgreSQL metrics
- job_name: 'postgres'
static_configs:
- targets: ['postgres-exporter:9187']
# Redis metrics
- job_name: 'redis'
static_configs:
- targets: ['redis-exporter:9121']
# ============================================
# Application Backends (after /metrics added)
# ============================================
# Auth Service
- job_name: 'mana-core-auth'
static_configs:
- targets: ['mana-core-auth:3001']
metrics_path: '/metrics'
scrape_interval: 30s
# Chat Backend
- job_name: 'chat-backend'
static_configs:
- targets: ['chat-backend:3030']
metrics_path: '/metrics'
scrape_interval: 30s
# Todo Backend
- job_name: 'todo-backend'
static_configs:
- targets: ['todo-backend:3031']
metrics_path: '/metrics'
scrape_interval: 30s
# Calendar Backend
- job_name: 'calendar-backend'
static_configs:
- targets: ['calendar-backend:3032']
metrics_path: '/metrics'
scrape_interval: 30s
# Clock Backend
- job_name: 'clock-backend'
static_configs:
- targets: ['clock-backend:3033']
metrics_path: '/metrics'
scrape_interval: 30s
# Contacts Backend
- job_name: 'contacts-backend'
static_configs:
- targets: ['contacts-backend:3034']
metrics_path: '/metrics'
scrape_interval: 30s
# Storage Backend (disabled - no /metrics endpoint yet)
# - job_name: 'storage-backend'
# static_configs:
# - targets: ['storage-backend:3035']
# metrics_path: '/metrics'
# scrape_interval: 30s
# Presi Backend (disabled - no /metrics endpoint yet)
# - job_name: 'presi-backend'
# static_configs:
# - targets: ['presi-backend:3036']
# metrics_path: '/metrics'
# scrape_interval: 30s
# Nutriphi Backend (disabled - no /metrics endpoint yet)
# - job_name: 'nutriphi-backend'
# static_configs:
# - targets: ['nutriphi-backend:3037']
# metrics_path: '/metrics'
# scrape_interval: 30s