# Mana Prometheus Configuration # Scrapes metrics from all services global: scrape_interval: 15s evaluation_interval: 15s # Load alerting rules rule_files: - /etc/prometheus/alerts.yml # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: ['alertmanager:9093'] scrape_configs: # Prometheus self-monitoring - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] # Host system metrics via node-exporter - job_name: 'node' static_configs: - targets: ['node-exporter:9100'] relabel_configs: - source_labels: [__address__] target_label: instance replacement: 'mac-mini' # Docker container metrics via cAdvisor - job_name: 'cadvisor' static_configs: - targets: ['cadvisor:8080'] # PostgreSQL metrics - job_name: 'postgres' static_configs: - targets: ['postgres-exporter:9187'] # Redis metrics - job_name: 'redis' static_configs: - targets: ['redis-exporter:9121'] # ============================================ # Core Services (Hono/Bun + Go) # ============================================ # Auth Service - job_name: 'mana-auth' static_configs: - targets: ['mana-auth:3001'] metrics_path: '/metrics' scrape_interval: 30s # Credits Service - job_name: 'mana-credits' static_configs: - targets: ['mana-credits:3002'] metrics_path: '/metrics' scrape_interval: 30s # User Service - job_name: 'mana-user' static_configs: - targets: ['mana-user:3062'] metrics_path: '/metrics' scrape_interval: 30s # Subscriptions Service - job_name: 'mana-subscriptions' static_configs: - targets: ['mana-subscriptions:3063'] metrics_path: '/metrics' scrape_interval: 30s # Analytics Service - job_name: 'mana-analytics' static_configs: - targets: ['mana-analytics:3064'] metrics_path: '/metrics' scrape_interval: 30s # ULoad Server - job_name: 'uload-server' static_configs: - targets: ['mana-app-uload-server:3070'] metrics_path: '/metrics' scrape_interval: 30s # Memoro Server - job_name: 'memoro-server' static_configs: - targets: ['mana-app-memoro-server:3015'] metrics_path: '/metrics' scrape_interval: 30s # NOTE: Individual app backends (chat, todo, calendar, contacts, storage, # food, music, plants, picture) have been REMOVED — all migrated to # local-first architecture. Only uload-server and memoro-server remain. # Mana LLM Gateway (Ollama + Google Fallback) - job_name: 'mana-llm' static_configs: - targets: ['mana-llm:3020'] metrics_path: '/metrics' scrape_interval: 15s # Mana Search Service - job_name: 'mana-search' static_configs: - targets: ['mana-search:3012'] metrics_path: '/metrics' scrape_interval: 30s # Mana Media Service - job_name: 'mana-media' static_configs: - targets: ['mana-media:3011'] metrics_path: '/metrics' scrape_interval: 30s # ============================================ # GPU Server (Windows PC, LAN: 192.168.178.11) # ============================================ # GPU: LLM Gateway - job_name: 'gpu-llm' static_configs: - targets: ['192.168.178.11:3025'] labels: instance: 'gpu-server' metrics_path: '/metrics' scrape_interval: 15s # GPU: Speech-to-Text (WhisperX) - job_name: 'gpu-stt' static_configs: - targets: ['192.168.178.11:3020'] labels: instance: 'gpu-server' metrics_path: '/health' scrape_interval: 30s # GPU: Text-to-Speech - job_name: 'gpu-tts' static_configs: - targets: ['192.168.178.11:3022'] labels: instance: 'gpu-server' metrics_path: '/health' scrape_interval: 30s # GPU: Image Generation (FLUX.2) - job_name: 'gpu-image-gen' static_configs: - targets: ['192.168.178.11:3023'] labels: instance: 'gpu-server' metrics_path: '/health' scrape_interval: 30s # GPU: Video Generation (LTX-Video) - job_name: 'gpu-video-gen' static_configs: - targets: ['192.168.178.11:3026'] labels: instance: 'gpu-server' metrics_path: '/health' scrape_interval: 30s # ============================================ # Go Infrastructure Services # ============================================ # API Gateway (Go) - job_name: 'mana-api-gateway' static_configs: - targets: ['mana-api-gateway:3016'] metrics_path: '/metrics' scrape_interval: 15s # Sync Server (Go) — local-first data sync - job_name: 'mana-sync' static_configs: - targets: ['mana-core-sync:3051'] metrics_path: '/metrics' scrape_interval: 30s # Notification Service (Go) — email, push, webhook - job_name: 'mana-notify' static_configs: - targets: ['mana-core-notify:3013'] metrics_path: '/metrics' scrape_interval: 30s # Crawler Service (Go) - job_name: 'mana-crawler' static_configs: - targets: ['mana-crawler:3014'] metrics_path: '/metrics' scrape_interval: 30s # ============================================ # Blackbox Exporter — HTTP Uptime Probes # ============================================ # Web Apps (Unified Mana app at mana.how + standalone games) - job_name: 'blackbox-web' metrics_path: /probe params: module: [http_2xx] static_configs: - targets: # Unified Mana app (all modules as routes) - https://mana.how - https://mana.how/chat - https://mana.how/todo - https://mana.how/calendar - https://mana.how/contacts - https://mana.how/times - https://mana.how/photos - https://mana.how/picture - https://mana.how/storage - https://mana.how/presi - https://mana.how/food - https://mana.how/plants - https://mana.how/calc - https://mana.how/zitare - https://mana.how/cards - https://mana.how/skilltree - https://mana.how/music - https://mana.how/citycorners - https://mana.how/memoro - https://mana.how/moodlit - https://mana.how/context - https://mana.how/questions - https://mana.how/uload - https://mana.how/notes - https://mana.how/habits - https://mana.how/guides - https://mana.how/inventory - https://mana.how/body - https://mana.how/journal - https://mana.how/dreams - https://mana.how/firsts - https://mana.how/cycles - https://mana.how/events - https://mana.how/finance - https://mana.how/places - https://mana.how/who - https://mana.how/news - https://mana.how/mail - https://mana.how/playground # Standalone games (separate containers) - https://whopxl.mana.how - https://arcade.mana.how relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: blackbox-exporter:9115 # API Health Endpoints (only services with running containers) - job_name: 'blackbox-api' metrics_path: /probe params: module: [http_health] static_configs: - targets: - https://auth.mana.how/health - https://api.mana.how/health relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: blackbox-exporter:9115 # Internal-only services (not exposed via Cloudflare). # Probed over the Docker network so the blackbox exporter reaches # them by container name. - job_name: 'blackbox-internal' metrics_path: /probe params: module: [http_2xx] static_configs: - targets: # mana-geocoding's own health (Hono wrapper) - http://mana-geocoding:3018/health # Upstream Pelias health, proxied through the wrapper so the # blackbox-exporter doesn't need host.docker.internal access. - http://mana-geocoding:3018/health/pelias relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: blackbox-exporter:9115 # Infrastructure & Monitoring Tools - job_name: 'blackbox-infra' metrics_path: /probe params: module: [http_2xx] static_configs: - targets: - https://git.mana.how - https://grafana.mana.how - https://stats.mana.how - https://glitchtip.mana.how relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: blackbox-exporter:9115 # GPU Server Services — probe /health, not / # The GPU services (whisper STT, TTS, FLUX image gen) only return 2xx # on /health; their root path returns 401/403/404 by design (auth or # API-only). Ollama is the exception — its / returns 200, but it has # no /health endpoint, so we keep it on / via a separate target. - job_name: 'blackbox-gpu' metrics_path: /probe params: module: [http_health] static_configs: - targets: - https://gpu-stt.mana.how/health - https://gpu-tts.mana.how/health - https://gpu-img.mana.how/health - https://gpu-video.mana.how/health relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: blackbox-exporter:9115 - job_name: 'blackbox-gpu-root' metrics_path: /probe params: module: [http_2xx] static_configs: - targets: - https://gpu-ollama.mana.how relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: blackbox-exporter:9115 # ============================================ # Pushgateway (deploy metrics, batch jobs) # ============================================ - job_name: 'pushgateway' honor_labels: true static_configs: - targets: ['pushgateway:9091']