mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 21:41:09 +02:00
feat(monitoring): add alerting stack and maintenance scripts
Medium priority stability improvements: Alerting: - Add vmalert for evaluating Prometheus alert rules - Add alertmanager for alert routing and grouping - Add alert-notifier service for Telegram/ntfy notifications - Enable cadvisor scraping in prometheus config Disk Monitoring: - Add check-disk-space.sh for hourly disk monitoring - Alert on 80% (warning) and 90% (critical) thresholds - Auto-cleanup Docker when disk is critical - Add com.manacore.disk-check.plist for LaunchD Weekly Reports: - Add weekly-report.sh for system health summary - Includes: backup status, disk usage, container health, database stats, error log summary - Runs every Sunday at 10 AM via LaunchD Health Check Updates: - Add checks for vmalert, alertmanager, alert-notifier Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
02a5172c7c
commit
acc8de36ee
11 changed files with 996 additions and 10 deletions
|
|
@ -17,8 +17,11 @@ for f in *.plist; do launchctl load ~/Library/LaunchAgents/$f; done
|
|||
| Service | Description | Interval |
|
||||
|---------|-------------|----------|
|
||||
| `docker-startup` | Starts Docker containers on boot | At login |
|
||||
| `ensure-containers` | Detects and restarts stuck containers | Every 5 min |
|
||||
| `ensure-containers` | Detects and restarts stuck/crash-looping containers | Every 5 min |
|
||||
| `health-check` | Checks all services and sends alerts | Every 5 min |
|
||||
| `backup-databases` | PostgreSQL backup with daily/weekly rotation | Daily 3 AM |
|
||||
| `disk-check` | Monitors disk space, alerts on thresholds | Hourly |
|
||||
| `weekly-report` | Generates system health summary | Sunday 10 AM |
|
||||
| `ssd-check` | Monitors SSD health | Periodic |
|
||||
| `mana-stt` | Speech-to-text service (Whisper) | At login |
|
||||
| `mana-tts` | Text-to-speech service (Kokoro) | At login |
|
||||
|
|
|
|||
34
scripts/mac-mini/launchd/com.manacore.disk-check.plist
Normal file
34
scripts/mac-mini/launchd/com.manacore.disk-check.plist
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Label</key>
|
||||
<string>com.manacore.disk-check</string>
|
||||
|
||||
<key>ProgramArguments</key>
|
||||
<array>
|
||||
<string>/bin/bash</string>
|
||||
<string>/Users/mana/projects/manacore-monorepo/scripts/mac-mini/check-disk-space.sh</string>
|
||||
</array>
|
||||
|
||||
<!-- Run hourly -->
|
||||
<key>StartInterval</key>
|
||||
<integer>3600</integer>
|
||||
|
||||
<!-- Also run at startup -->
|
||||
<key>RunAtLoad</key>
|
||||
<true/>
|
||||
|
||||
<key>StandardOutPath</key>
|
||||
<string>/tmp/manacore-disk-check.log</string>
|
||||
|
||||
<key>StandardErrorPath</key>
|
||||
<string>/tmp/manacore-disk-check.error.log</string>
|
||||
|
||||
<key>EnvironmentVariables</key>
|
||||
<dict>
|
||||
<key>PATH</key>
|
||||
<string>/usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin</string>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
||||
37
scripts/mac-mini/launchd/com.manacore.weekly-report.plist
Normal file
37
scripts/mac-mini/launchd/com.manacore.weekly-report.plist
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Label</key>
|
||||
<string>com.manacore.weekly-report</string>
|
||||
|
||||
<key>ProgramArguments</key>
|
||||
<array>
|
||||
<string>/bin/bash</string>
|
||||
<string>/Users/mana/projects/manacore-monorepo/scripts/mac-mini/weekly-report.sh</string>
|
||||
</array>
|
||||
|
||||
<!-- Run every Sunday at 10:00 AM -->
|
||||
<key>StartCalendarInterval</key>
|
||||
<dict>
|
||||
<key>Weekday</key>
|
||||
<integer>0</integer>
|
||||
<key>Hour</key>
|
||||
<integer>10</integer>
|
||||
<key>Minute</key>
|
||||
<integer>0</integer>
|
||||
</dict>
|
||||
|
||||
<key>StandardOutPath</key>
|
||||
<string>/tmp/manacore-weekly-report.log</string>
|
||||
|
||||
<key>StandardErrorPath</key>
|
||||
<string>/tmp/manacore-weekly-report.error.log</string>
|
||||
|
||||
<key>EnvironmentVariables</key>
|
||||
<dict>
|
||||
<key>PATH</key>
|
||||
<string>/usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin</string>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
||||
Loading…
Add table
Add a link
Reference in a new issue