📈 feat(monitoring): upgrade to VictoriaMetrics + DuckDB analytics

- Replace Prometheus with VictoriaMetrics (2-year retention)
- Add DuckDB analytics module for business KPIs (unlimited retention)
- Add master overview dashboard combining all metrics
- Add business metrics dashboard for user growth tracking
- Add backup script for VictoriaMetrics snapshots and DuckDB
- Add ADR documentation for monitoring stack decision

Analytics API endpoints:
- GET /api/v1/analytics/health - Service health
- GET /api/v1/analytics/latest - Latest metrics snapshot
- GET /api/v1/analytics/growth - User growth over time
- GET /api/v1/analytics/monthly - Monthly aggregates
- POST /api/v1/analytics/snapshot - Manual snapshot trigger
This commit is contained in:
Till-JS 2026-01-28 12:38:04 +01:00
parent 2e7378710f
commit 9dfad0128a
17 changed files with 2901 additions and 18 deletions

View file

@ -171,7 +171,16 @@ ZITARE_DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/zitare
ZITARE_BOT_PORT=3303 ZITARE_BOT_PORT=3303
ZITARE_BOT_DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/zitare_bot ZITARE_BOT_DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/zitare_bot
ZITARE_BOT_TELEGRAM_TOKEN= ZITARE_BOT_TELEGRAM_TOKEN=8489424174:AAHHG_mlLVeu6xAWY6U2ZGXO0D8JKWnqBvg
# ============================================
# TODO TELEGRAM BOT
# ============================================
TODO_BOT_PORT=3304
TODO_BOT_DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/todo_bot
TODO_BOT_TELEGRAM_TOKEN=8363906368:AAHzNC1DPSb0TUb2a3UGWWH1_rrAQFdBv2w
TODO_BOT_API_URL=http://localhost:3018
# ============================================ # ============================================
# PRESI PROJECT # PRESI PROJECT

View file

@ -90,6 +90,10 @@ services:
SMTP_PASSWORD: ${SMTP_PASSWORD} SMTP_PASSWORD: ${SMTP_PASSWORD}
SMTP_FROM: ManaCore <noreply@mana.how> SMTP_FROM: ManaCore <noreply@mana.how>
CORS_ORIGINS: https://mana.how,https://chat.mana.how,https://todo.mana.how,https://calendar.mana.how,https://clock.mana.how,https://contacts.mana.how,https://storage.mana.how,https://presi.mana.how CORS_ORIGINS: https://mana.how,https://chat.mana.how,https://todo.mana.how,https://calendar.mana.how,https://clock.mana.how,https://contacts.mana.how,https://storage.mana.how,https://presi.mana.how
# DuckDB Analytics (Business Metrics)
DUCKDB_PATH: /data/analytics/metrics.duckdb
volumes:
- analytics_data:/data/analytics
ports: ports:
- "3001:3001" - "3001:3001"
healthcheck: healthcheck:
@ -534,23 +538,28 @@ services:
# Monitoring Stack # Monitoring Stack
# ============================================ # ============================================
prometheus: # VictoriaMetrics - High-performance Prometheus replacement
image: prom/prometheus:v2.51.0 # See docs/decisions/001-monitoring-stack-upgrade.md for details
container_name: manacore-prometheus victoriametrics:
image: victoriametrics/victoria-metrics:v1.99.0
container_name: manacore-victoriametrics
restart: always restart: always
command: command:
- '--config.file=/etc/prometheus/prometheus.yml' - '-storageDataPath=/storage'
- '--storage.tsdb.path=/prometheus' - '-retentionPeriod=2y'
- '--storage.tsdb.retention.time=30d' - '-httpListenAddr=:8428'
- '--web.enable-lifecycle' - '-promscrape.config=/etc/prometheus/prometheus.yml'
- '-promscrape.config.strictParse=false'
- '-selfScrapeInterval=15s'
- '-search.latencyOffset=0s'
volumes: volumes:
- ./docker/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro - ./docker/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./docker/prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro - ./docker/prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro
- prometheus_data:/prometheus - victoriametrics_data:/storage
ports: ports:
- "9090:9090" - "8428:8428"
healthcheck: healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9090/-/healthy"] test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:8428/health"]
interval: 30s interval: 30s
timeout: 10s timeout: 10s
retries: 3 retries: 3
@ -572,7 +581,7 @@ services:
container_name: manacore-grafana container_name: manacore-grafana
restart: always restart: always
depends_on: depends_on:
prometheus: victoriametrics:
condition: service_healthy condition: service_healthy
environment: environment:
GF_SECURITY_ADMIN_USER: admin GF_SECURITY_ADMIN_USER: admin
@ -943,10 +952,12 @@ volumes:
name: manacore-redis name: manacore-redis
minio_data: minio_data:
name: manacore-minio name: manacore-minio
prometheus_data: victoriametrics_data:
name: manacore-prometheus name: manacore-victoriametrics
grafana_data: grafana_data:
name: manacore-grafana name: manacore-grafana
analytics_data:
name: manacore-analytics
n8n_data: n8n_data:
name: manacore-n8n name: manacore-n8n
synapse_data: synapse_data:

View file

@ -0,0 +1,439 @@
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": false,
"keepTime": true,
"tags": [],
"targetBlank": true,
"title": "Analytics API",
"url": "http://localhost:3001/api/analytics/health",
"type": "link"
}
],
"panels": [
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"id": 1,
"panels": [],
"title": "Business KPIs (Long-term Storage via DuckDB)",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"description": "Current total users from Prometheus (real-time)",
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "blue", "value": null }]
},
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 5, "w": 6, "x": 0, "y": 1 },
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.0.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "auth_users_total",
"legendFormat": "Total Users",
"refId": "A"
}
],
"title": "Total Users (Real-time)",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"description": "Verified users from Prometheus",
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "green", "value": null }]
},
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 5, "w": 6, "x": 6, "y": 1 },
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.0.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "auth_users_verified",
"legendFormat": "Verified",
"refId": "A"
}
],
"title": "Verified Users",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"description": "Verification rate percentage",
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 50 },
{ "color": "green", "value": 80 }
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": { "h": 5, "w": 6, "x": 12, "y": 1 },
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.0.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "auth_users_verified / auth_users_total",
"legendFormat": "Verification Rate",
"refId": "A"
}
],
"title": "Verification Rate",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"description": "New users registered today",
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "orange", "value": null }]
},
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 5, "w": 6, "x": 18, "y": 1 },
"id": 5,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.0.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "auth_users_created_today",
"legendFormat": "Today",
"refId": "A"
}
],
"title": "New Users Today",
"type": "stat"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 6 },
"id": 6,
"panels": [],
"title": "User Growth Trends",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"description": "User growth over the selected time range",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "opacity",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "auto",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "green", "value": null }]
},
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 10, "w": 16, "x": 0, "y": 7 },
"id": 7,
"options": {
"legend": {
"calcs": ["lastNotNull", "min", "max"],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": { "mode": "multi", "sort": "desc" }
},
"pluginVersion": "10.0.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "auth_users_total",
"legendFormat": "Total Users",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "auth_users_verified",
"legendFormat": "Verified Users",
"refId": "B"
}
],
"title": "User Growth Over Time",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"description": "New registrations by period",
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "purple", "value": null }]
},
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 5, "w": 8, "x": 16, "y": 7 },
"id": 8,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"textMode": "value_and_name"
},
"pluginVersion": "10.0.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "auth_users_created_today",
"legendFormat": "Today",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "auth_users_created_this_week",
"legendFormat": "This Week",
"refId": "B"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "auth_users_created_this_month",
"legendFormat": "This Month",
"refId": "C"
}
],
"title": "New Registrations",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"description": "Verification rate over time",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "opacity",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "auto",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "green", "value": null }]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": { "h": 5, "w": 8, "x": 16, "y": 12 },
"id": 9,
"options": {
"legend": {
"calcs": ["mean"],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": { "mode": "single", "sort": "none" }
},
"pluginVersion": "10.0.0",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "auth_users_verified / auth_users_total",
"legendFormat": "Verification Rate",
"refId": "A"
}
],
"title": "Verification Rate Trend",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 17 },
"id": 10,
"panels": [],
"title": "Data Retention Info",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 18 },
"id": 11,
"options": {
"code": {
"language": "plaintext",
"showLineNumbers": false,
"showMiniMap": false
},
"content": "## Data Retention Policy\n\n| Data Source | Retention | Purpose |\n|-------------|-----------|----------|\n| **VictoriaMetrics** | 2 Years | Operative metrics (CPU, Memory, Requests, Latency) |\n| **DuckDB** | Unlimited | Business KPIs (User growth, Feature usage) |\n\n**API Endpoints:**\n- `GET /api/analytics/health` - Service health\n- `GET /api/analytics/latest` - Latest metrics snapshot\n- `GET /api/analytics/growth?days=90` - User growth data\n- `GET /api/analytics/monthly?months=12` - Monthly aggregates\n- `POST /api/analytics/snapshot` - Trigger manual snapshot",
"mode": "markdown"
},
"pluginVersion": "10.4.1",
"title": "About Business Metrics",
"type": "text"
}
],
"refresh": "1m",
"schemaVersion": 38,
"tags": ["manacore", "business", "kpi", "duckdb"],
"templating": {
"list": [
{
"current": { "selected": false, "text": "Prometheus", "value": "Prometheus" },
"hide": 0,
"includeAll": false,
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
}
]
},
"time": { "from": "now-30d", "to": "now" },
"timepicker": {},
"timezone": "browser",
"title": "Business Metrics",
"uid": "business-metrics",
"version": 1,
"weekStart": ""
}

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,6 @@
# Grafana Datasource Provisioning # Grafana Datasource Provisioning
# Auto-configures Prometheus as the default datasource # Auto-configures VictoriaMetrics as the default datasource
# VictoriaMetrics is 100% Prometheus-compatible, using same type
apiVersion: 1 apiVersion: 1
@ -7,9 +8,19 @@ datasources:
- name: Prometheus - name: Prometheus
type: prometheus type: prometheus
access: proxy access: proxy
url: http://prometheus:9090 url: http://victoriametrics:8428
isDefault: true isDefault: true
editable: true editable: true
jsonData: jsonData:
timeInterval: "15s" timeInterval: "15s"
httpMethod: POST httpMethod: POST
# Business Metrics API (DuckDB via mana-core-auth)
- name: Business Metrics
type: yesoreyeram-infinity-datasource
access: proxy
url: http://mana-core-auth:3001
isDefault: false
editable: true
jsonData:
datasource_mode: "basic"

View file

@ -0,0 +1,593 @@
# ADR-001: Monitoring Stack Upgrade - VictoriaMetrics + DuckDB
**Status:** Accepted
**Date:** 2025-01-28
**Author:** Till Schneider
**Reviewers:** -
## Executive Summary
Upgrade des ManaCore Monitoring Stacks von Prometheus (30 Tage Retention) auf VictoriaMetrics (2 Jahre) + DuckDB (unbegrenzt) für langfristige Metriken-Speicherung und Business-Analytics.
---
## 1. Kontext & Problemstellung
### 1.1 Aktuelle Situation
ManaCore nutzt einen Standard-Prometheus + Grafana Stack für Monitoring:
```
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ NestJS Backends│────>│ Prometheus │────>│ Grafana │
│ (6 Services) │ │ (30 Tage) │ │ (5 Dashboards) │
└─────────────────┘ └─────────────────┘ └─────────────────┘
├── mana-core-auth (Port 3001)
├── chat-backend (Port 3002)
├── todo-backend (Port 3018)
├── calendar-backend (Port 3016)
├── clock-backend (Port 3017)
└── contacts-backend (Port 3015)
```
**Komponenten:**
- Prometheus v2.51.0 mit 30 Tagen Retention
- Grafana 10.4.1 mit 5 Dashboards
- Node Exporter, cAdvisor, PostgreSQL Exporter, Redis Exporter
- Alerting Rules (20+ Regeln)
### 1.2 Das Problem
**Nach 30 Tagen sind alle historischen Metriken unwiederbringlich verloren.**
| Betroffene Daten | Konsequenz |
|------------------|------------|
| User-Wachstum (`auth_users_total`) | Keine Trend-Analyse möglich |
| Historische Error Rates | Keine Langzeit-Vergleiche |
| Performance-Trends | Keine Kapazitätsplanung |
| Infrastruktur-Metriken | Keine saisonalen Muster erkennbar |
**Besonders kritisch:** Business-KPIs wie `auth_users_total`, `auth_users_created_this_month` sind Point-in-Time Snapshots. Ohne historische Daten ist es unmöglich zu rekonstruieren, wie viele User vor 2 Monaten existierten.
### 1.3 Anforderungen
| Anforderung | Priorität |
|-------------|-----------|
| Operative Metriken für mindestens 1-2 Jahre speichern | Hoch |
| Business-KPIs unbegrenzt speichern | Hoch |
| Keine Änderung an bestehenden Dashboards | Mittel |
| Minimaler zusätzlicher Ressourcenverbrauch | Mittel |
| Einfache Wartung und Backup | Mittel |
---
## 2. Evaluierte Optionen
### 2.1 Option A: Prometheus Retention erhöhen
**Ansatz:** `--storage.tsdb.retention.time=365d`
**Vorteile:**
- Keine Migration nötig
- Keine neuen Komponenten
**Nachteile:**
- Prometheus TSDB ist nicht für Langzeit optimiert
- RAM-Verbrauch steigt linear mit Retention
- Queries über alte Daten werden langsam
- Compaction-Overhead bei großen Datenmengen
**Bewertung:** Kurzfristige Lösung, skaliert nicht.
### 2.2 Option B: Thanos / Cortex
**Ansatz:** Prometheus + Langzeit-Storage-Layer (S3/MinIO)
**Vorteile:**
- Industriestandard für große Deployments
- Unbegrenzte Retention möglich
**Nachteile:**
- Hohe Komplexität (5+ zusätzliche Komponenten)
- Overkill für ManaCore's Größe (~50k Time Series)
- Signifikanter Ops-Overhead
**Bewertung:** Overengineered für unseren Use Case.
### 2.3 Option C: VictoriaMetrics (gewählt)
**Ansatz:** Drop-in Replacement für Prometheus
**Vorteile:**
- 100% Prometheus-kompatibel (PromQL, Config-Format, Exporters)
- 3-10x bessere Kompression
- 5-10x weniger RAM-Verbrauch
- Schnellere Queries über historische Daten
- Single Binary, einfaches Deployment
- Migration in 10 Minuten
**Nachteile:**
- Weniger bekannt als Prometheus (aber wachsende Community)
- CNCF Sandbox (nicht Graduated wie Prometheus)
**Bewertung:** Beste Balance aus Einfachheit und Leistung.
### 2.4 Option D: PostgreSQL für Business-Metriken
**Ansatz:** Tägliche Snapshots in PostgreSQL speichern
**Vorteile:**
- Bestehende Infrastruktur nutzen
- SQL für Queries
- Unbegrenzte Retention
**Nachteile:**
- Nicht optimiert für Analytics-Queries
- Connection-Pool Overhead
- Row-based Storage ineffizient für Aggregationen
**Bewertung:** Funktional, aber nicht optimal für Analytics.
### 2.5 Option E: DuckDB für Business-Metriken (gewählt)
**Ansatz:** Embedded OLAP-Datenbank für tägliche Business-KPI Snapshots
**Vorteile:**
- Kein Server nötig (embedded, single file)
- Column-oriented = perfekt für Analytics
- 10-100x schneller als PostgreSQL für Aggregationen
- Exzellente Kompression
- Native Parquet Import/Export
- SQL-kompatibel
**Nachteile:**
- Nicht für concurrent writes (irrelevant bei 1x täglich)
- Keine native Grafana-Integration (API-Endpoint nötig)
**Bewertung:** Perfekt für den Use Case (append-only, read-heavy, analytics).
---
## 3. Entscheidung
### 3.1 Gewählte Architektur
```
┌─────────────────────────────────────────────────────────────────────────┐
│ ManaCore Monitoring Stack v2 │
├─────────────────────────────────────────────────────────────────────────┤
│ │
│ OPERATIVE METRIKEN (High-Frequency Time Series) │
│ ════════════════════════════════════════════════ │
│ │
│ ┌──────────────┐ ┌──────────────────┐ ┌─────────────────┐ │
│ │ Backends │────>│ VictoriaMetrics │────>│ Grafana │ │
│ │ /metrics │ │ │ │ │ │
│ │ │ │ Retention: 2y │ │ Existing │ │
│ │ + Exporters │ │ Scrape: 15-30s │ │ Dashboards │ │
│ └──────────────┘ └──────────────────┘ └─────────────────┘ │
│ ▲ │
│ │ │
│ BUSINESS METRIKEN (Daily Snapshots, Analytics) │ │
│ ══════════════════════════════════════════════ │ │
│ │ │
│ ┌──────────────┐ ┌──────────────────┐ │ │
│ │ Daily Cron │────>│ DuckDB │────────────┘ │
│ │ 00:00 UTC │ │ │ (via JSON API) │
│ │ │ │ Retention: ∞ │ │
│ │ Snapshots: │ │ File: metrics.db│ │
│ │ - Users │ │ Size: ~10MB/year│ │
│ │ - Growth │ │ │ │
│ │ - Features │ │ Backup: cp file │ │
│ └──────────────┘ └──────────────────┘ │
│ │ │
│ ▼ │
│ ┌──────────────┐ │
│ │Parquet Export│ │
│ │ (Archiv) │ │
│ └──────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────────┘
```
### 3.2 Daten-Aufteilung
| Datentyp | Storage | Retention | Grund |
|----------|---------|-----------|-------|
| CPU, Memory, Disk | VictoriaMetrics | 2 Jahre | High-frequency, Time-Series |
| HTTP Requests, Latency | VictoriaMetrics | 2 Jahre | High-frequency, PromQL |
| Error Rates, Status Codes | VictoriaMetrics | 2 Jahre | Alerting, Debugging |
| Container Metrics | VictoriaMetrics | 2 Jahre | Kapazitätsplanung |
| **User Counts** | DuckDB | Unbegrenzt | Business KPI, Trend-Analyse |
| **User Growth** | DuckDB | Unbegrenzt | Business KPI |
| **Feature Usage** | DuckDB | Unbegrenzt | Product Analytics |
| **Revenue/Subscriptions** | DuckDB | Unbegrenzt | Business KPI |
### 3.3 Warum diese Kombination?
**VictoriaMetrics für operative Metriken:**
- Prometheus-kompatibel = keine Dashboard-Änderungen
- 2 Jahre Retention bei ~15GB Storage
- Schnelle Queries auch über historische Daten
- Bewährte Time-Series Datenbank
**DuckDB für Business-Metriken:**
- Perfekt für "1x täglich schreiben, oft lesen"
- SQL für komplexe Analytics-Queries
- Single-File = triviales Backup
- Kein zusätzlicher Server/Container
- Unbegrenzte Retention bei minimalem Footprint
---
## 4. Technische Details
### 4.1 VictoriaMetrics Konfiguration
```yaml
# docker-compose.macmini.yml
services:
victoriametrics:
image: victoriametrics/victoria-metrics:v1.99.0
container_name: victoriametrics
restart: unless-stopped
command:
- '-storageDataPath=/storage'
- '-retentionPeriod=2y'
- '-httpListenAddr=:8428'
- '-promscrape.config=/etc/prometheus/prometheus.yml'
- '-promscrape.config.strictParse=false'
volumes:
- vm-storage:/storage
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro
ports:
- "8428:8428"
networks:
- manacore-network
```
**Ressourcen-Vergleich (geschätzt):**
| Metrik | Prometheus (30d) | VictoriaMetrics (2y) |
|--------|------------------|----------------------|
| RAM | ~2 GB | ~500 MB |
| Disk | ~5 GB | ~15 GB |
| CPU | Höher (Compaction) | Niedriger |
### 4.2 DuckDB Schema
```sql
-- Haupt-Tabelle für tägliche Snapshots
CREATE TABLE daily_metrics (
date DATE PRIMARY KEY,
-- User Metrics
total_users INTEGER NOT NULL,
verified_users INTEGER NOT NULL,
new_users_today INTEGER NOT NULL,
new_users_week INTEGER NOT NULL,
new_users_month INTEGER NOT NULL,
-- Engagement (Platzhalter für Zukunft)
daily_active_users INTEGER,
weekly_active_users INTEGER,
monthly_active_users INTEGER,
-- Per-App Metrics (Platzhalter)
chat_messages_sent INTEGER,
pictures_generated INTEGER,
-- Infrastructure Snapshots
total_db_size_bytes BIGINT,
total_storage_size_bytes BIGINT,
-- Metadata
recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Index für schnelle Range-Queries
CREATE INDEX idx_daily_metrics_date ON daily_metrics(date);
-- View für monatliche Aggregation
CREATE VIEW monthly_metrics AS
SELECT
DATE_TRUNC('month', date) AS month,
MAX(total_users) AS total_users_eom,
SUM(new_users_today) AS new_users,
AVG(daily_active_users) AS avg_dau
FROM daily_metrics
GROUP BY DATE_TRUNC('month', date);
```
### 4.3 DuckDB Service Implementation
```typescript
// services/mana-core-auth/src/analytics/analytics.service.ts
@Injectable()
export class AnalyticsService {
private db: Database;
constructor(
private readonly usersService: UsersService,
private readonly configService: ConfigService,
) {
const dbPath = this.configService.get('DUCKDB_PATH', '/data/metrics.duckdb');
this.db = new Database(dbPath);
this.initializeSchema();
}
@Cron('0 0 * * *') // Täglich um Mitternacht UTC
async recordDailySnapshot(): Promise<void> {
const today = new Date().toISOString().split('T')[0];
const metrics = {
date: today,
total_users: await this.usersService.countTotal(),
verified_users: await this.usersService.countVerified(),
new_users_today: await this.usersService.countCreatedToday(),
new_users_week: await this.usersService.countCreatedThisWeek(),
new_users_month: await this.usersService.countCreatedThisMonth(),
total_db_size_bytes: await this.getDbSize(),
};
this.db.run(`
INSERT OR REPLACE INTO daily_metrics
(date, total_users, verified_users, new_users_today,
new_users_week, new_users_month, total_db_size_bytes)
VALUES (?, ?, ?, ?, ?, ?, ?)
`, [
metrics.date,
metrics.total_users,
metrics.verified_users,
metrics.new_users_today,
metrics.new_users_week,
metrics.new_users_month,
metrics.total_db_size_bytes,
]);
}
async getUserGrowth(months: number = 12): Promise<GrowthData[]> {
return this.db.all(`
SELECT
date,
total_users,
total_users - LAG(total_users) OVER (ORDER BY date) as growth
FROM daily_metrics
WHERE date > CURRENT_DATE - INTERVAL '${months} months'
ORDER BY date
`);
}
}
```
### 4.4 Grafana Integration
**VictoriaMetrics:**
```yaml
# docker/grafana/provisioning/datasources/prometheus.yml
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://victoriametrics:8428 # Nur URL ändert sich
isDefault: true
editable: false
```
**DuckDB (via JSON API):**
```yaml
# docker/grafana/provisioning/datasources/duckdb.yml
apiVersion: 1
datasources:
- name: Business Metrics
type: simpod-json-datasource
url: http://mana-core-auth:3001/api/analytics
isDefault: false
editable: false
```
---
## 5. Migration
### 5.1 Migrationspfad
```
Phase 1: VictoriaMetrics Deployment (Zero Downtime)
═══════════════════════════════════════════════════
1. VictoriaMetrics Container hinzufügen
2. Parallel zu Prometheus laufen lassen
3. Grafana Datasource auf VM umstellen
4. Prometheus Container entfernen
Phase 2: DuckDB Integration
═══════════════════════════
1. DuckDB Dependency hinzufügen
2. Analytics Service implementieren
3. Cron-Job aktivieren
4. API Endpoints erstellen
5. Grafana Dashboard für Business Metrics
Phase 3: Historische Daten (Optional)
═════════════════════════════════════
1. Prometheus Daten exportieren
2. In VictoriaMetrics importieren
3. Initiale DuckDB-Befüllung aus Prometheus
```
### 5.2 Rollback-Plan
**VictoriaMetrics → Prometheus:**
- Gleiche Config-Datei funktioniert
- Grafana Datasource URL zurückändern
- Container tauschen
**DuckDB:**
- Service deaktivieren
- Keine Abhängigkeiten in anderen Services
---
## 6. Monitoring & Alerting
### 6.1 VictoriaMetrics Self-Monitoring
```yaml
# prometheus/alerts.yml (funktioniert auch mit VM)
groups:
- name: victoriametrics
rules:
- alert: VMStorageSpaceLow
expr: vm_free_disk_space_bytes / vm_available_disk_space_bytes < 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "VictoriaMetrics disk space low"
```
### 6.2 DuckDB Health Check
```typescript
// Endpoint: GET /api/analytics/health
{
"status": "healthy",
"database_size_bytes": 10485760,
"last_snapshot": "2025-01-28",
"total_records": 365
}
```
---
## 7. Backup-Strategie
### 7.1 VictoriaMetrics
```bash
# Snapshot erstellen (built-in)
curl -X POST "http://victoriametrics:8428/snapshot/create"
# Backup zu S3/MinIO
vmbackup -storageDataPath=/storage -snapshot.createURL="http://localhost:8428/snapshot/create" -dst=s3://backups/vm/
```
### 7.2 DuckDB
```bash
# Einfacher File-Copy (konsistent da single-writer)
cp /data/metrics.duckdb /backup/metrics-$(date +%Y-%m-%d).duckdb
# Oder Parquet-Export für Archivierung
duckdb /data/metrics.duckdb -c "COPY daily_metrics TO '/backup/metrics.parquet' (FORMAT PARQUET)"
```
---
## 8. Kosten & Ressourcen
### 8.1 Storage-Projektion (2 Jahre)
| Komponente | Jetzt | Nach Migration |
|------------|-------|----------------|
| Prometheus | 5 GB (30d) | 0 GB (entfernt) |
| VictoriaMetrics | 0 GB | ~15 GB (2y) |
| DuckDB | 0 GB | ~20 MB (2y) |
| **Total** | **5 GB** | **~15 GB** |
### 8.2 RAM-Projektion
| Komponente | Jetzt | Nach Migration |
|------------|-------|----------------|
| Prometheus | ~2 GB | 0 GB |
| VictoriaMetrics | 0 GB | ~500 MB |
| DuckDB | 0 GB | ~50 MB (on-demand) |
| **Total** | **~2 GB** | **~550 MB** |
---
## 9. Implementierungsplan
### Phase 1: VictoriaMetrics (Tag 1)
- [ ] docker-compose.macmini.yml aktualisieren
- [ ] VictoriaMetrics Container hinzufügen
- [ ] Grafana Datasource konfigurieren
- [ ] Bestehende Dashboards testen
- [ ] Prometheus Container entfernen
### Phase 2: DuckDB Service (Tag 1-2)
- [ ] duckdb Package installieren
- [ ] AnalyticsModule erstellen
- [ ] DuckDB Schema initialisieren
- [ ] Daily Snapshot Cron-Job
- [ ] API Endpoints für Grafana
### Phase 3: Dashboards & Dokumentation (Tag 2)
- [ ] Business Metrics Dashboard erstellen
- [ ] Master Overview Dashboard aktualisieren
- [ ] Dokumentation finalisieren
- [ ] Backup-Scripts erstellen
---
## 10. Entscheidungsmatrix
| Kriterium | Gewicht | Prometheus | VM + DuckDB | Score |
|-----------|---------|------------|-------------|-------|
| Langzeit-Retention | 30% | 2/10 | 10/10 | +2.4 |
| Ressourceneffizienz | 20% | 4/10 | 9/10 | +1.0 |
| Migrationsaufwand | 15% | 10/10 | 8/10 | -0.3 |
| Wartbarkeit | 15% | 7/10 | 8/10 | +0.15 |
| Analytics-Fähigkeit | 10% | 3/10 | 9/10 | +0.6 |
| Backup-Einfachheit | 10% | 5/10 | 9/10 | +0.4 |
| **Gesamt** | 100% | **4.7/10** | **9.1/10** | **+4.4** |
---
## 11. Risiken & Mitigationen
| Risiko | Wahrscheinlichkeit | Impact | Mitigation |
|--------|-------------------|--------|------------|
| VM nicht 100% PromQL-kompatibel | Niedrig | Mittel | Dashboards vorab testen |
| DuckDB Datenverlust | Niedrig | Hoch | Tägliches Backup |
| Cron-Job Ausfall | Mittel | Niedrig | Monitoring + Catch-up Logic |
| Storage voll | Niedrig | Mittel | Alerting bei 80% |
---
## 12. Referenzen
- [VictoriaMetrics Dokumentation](https://docs.victoriametrics.com/)
- [VictoriaMetrics vs Prometheus Benchmark](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f)
- [DuckDB Dokumentation](https://duckdb.org/docs/)
- [Grafana JSON Datasource](https://grafana.com/grafana/plugins/simpod-json-datasource/)
---
## Appendix A: Bestehende Dashboards
| Dashboard | UID | Änderung nötig |
|-----------|-----|----------------|
| System Overview | `system-overview` | Keine |
| Backends & Docker | `backends-docker` | Keine |
| Application Details | `application-details` | Keine |
| Database Details | `database-details` | Keine |
| User Statistics | `user-statistics` | Keine |
| Master Overview | `master-overview` | Business Metrics hinzufügen |
## Appendix B: Prometheus Config Kompatibilität
Die bestehende `prometheus.yml` funktioniert ohne Änderung mit VictoriaMetrics:
```yaml
# Alle Scrape-Configs bleiben identisch
scrape_configs:
- job_name: 'mana-core-auth'
static_configs:
- targets: ['mana-core-auth:3001']
metrics_path: '/metrics'
scrape_interval: 30s
# ... alle anderen Jobs
```

View file

@ -20,6 +20,7 @@
"setup:db": "./scripts/setup-databases.sh", "setup:db": "./scripts/setup-databases.sh",
"setup:db:chat": "./scripts/setup-databases.sh chat", "setup:db:chat": "./scripts/setup-databases.sh chat",
"setup:db:auth": "./scripts/setup-databases.sh auth", "setup:db:auth": "./scripts/setup-databases.sh auth",
"seed:dev-user": "pnpm --filter mana-core-auth db:seed:dev",
"build:packages": "pnpm --filter '@manacore/*' build", "build:packages": "pnpm --filter '@manacore/*' build",
"postinstall": "node scripts/generate-env.mjs || true && pnpm run build:packages || true", "postinstall": "node scripts/generate-env.mjs || true && pnpm run build:packages || true",
"manacore:dev": "turbo run dev --filter=manacore...", "manacore:dev": "turbo run dev --filter=manacore...",
@ -212,6 +213,18 @@
"dev:projectdoc:full": "./scripts/setup-databases.sh projectdoc && pnpm dev:projectdoc", "dev:projectdoc:full": "./scripts/setup-databases.sh projectdoc && pnpm dev:projectdoc",
"projectdoc:db:push": "pnpm --filter @manacore/telegram-project-doc-bot db:push", "projectdoc:db:push": "pnpm --filter @manacore/telegram-project-doc-bot db:push",
"projectdoc:db:studio": "pnpm --filter @manacore/telegram-project-doc-bot db:studio", "projectdoc:db:studio": "pnpm --filter @manacore/telegram-project-doc-bot db:studio",
"dev:zitare-bot": "pnpm --filter @manacore/telegram-zitare-bot start:dev",
"dev:zitare-bot:full": "./scripts/setup-databases.sh zitare_bot && pnpm dev:zitare-bot",
"zitare-bot:db:push": "pnpm --filter @manacore/telegram-zitare-bot db:push",
"zitare-bot:db:studio": "pnpm --filter @manacore/telegram-zitare-bot db:studio",
"dev:todo-bot": "pnpm --filter @manacore/telegram-todo-bot start:dev",
"dev:todo-bot:full": "./scripts/setup-databases.sh todo_bot && ./scripts/setup-databases.sh todo && ./scripts/setup-databases.sh auth && concurrently -n auth,todo-be,bot -c blue,green,cyan \"pnpm dev:auth\" \"pnpm dev:todo:backend\" \"pnpm dev:todo-bot\"",
"todo-bot:db:push": "pnpm --filter @manacore/telegram-todo-bot db:push",
"todo-bot:db:studio": "pnpm --filter @manacore/telegram-todo-bot db:studio",
"dev:nutriphi-bot": "pnpm --filter @manacore/telegram-nutriphi-bot start:dev",
"dev:nutriphi-bot:full": "./scripts/setup-databases.sh nutriphi_bot && pnpm dev:nutriphi-bot",
"nutriphi-bot:db:push": "pnpm --filter @manacore/telegram-nutriphi-bot db:push",
"nutriphi-bot:db:studio": "pnpm --filter @manacore/telegram-nutriphi-bot db:studio",
"prepare": "husky" "prepare": "husky"
}, },
"devDependencies": { "devDependencies": {

130
scripts/backup-monitoring.sh Executable file
View file

@ -0,0 +1,130 @@
#!/bin/bash
# Backup script for ManaCore Monitoring Stack
# - VictoriaMetrics (2 years of metrics)
# - DuckDB (Business KPIs)
set -e
# Configuration
BACKUP_DIR="${BACKUP_DIR:-/backup/monitoring}"
DATE=$(date +%Y-%m-%d)
RETENTION_DAYS="${RETENTION_DAYS:-30}" # Keep backups for 30 days
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
# Create backup directory
mkdir -p "$BACKUP_DIR"
# ============================================
# Backup VictoriaMetrics
# ============================================
backup_victoriametrics() {
log_info "Creating VictoriaMetrics snapshot..."
# Create snapshot via API
SNAPSHOT_RESPONSE=$(curl -s -X POST "http://localhost:8428/snapshot/create")
SNAPSHOT_NAME=$(echo "$SNAPSHOT_RESPONSE" | grep -o '"snapshot":"[^"]*"' | cut -d'"' -f4)
if [ -z "$SNAPSHOT_NAME" ]; then
log_error "Failed to create VictoriaMetrics snapshot"
echo "$SNAPSHOT_RESPONSE"
return 1
fi
log_info "Snapshot created: $SNAPSHOT_NAME"
# Copy snapshot to backup directory
# Note: Adjust path based on your volume mount
VM_DATA_PATH="/var/lib/docker/volumes/manacore-victoriametrics/_data"
SNAPSHOT_PATH="$VM_DATA_PATH/snapshots/$SNAPSHOT_NAME"
if [ -d "$SNAPSHOT_PATH" ]; then
BACKUP_FILE="$BACKUP_DIR/victoriametrics-$DATE.tar.gz"
log_info "Compressing snapshot to $BACKUP_FILE..."
tar -czf "$BACKUP_FILE" -C "$VM_DATA_PATH/snapshots" "$SNAPSHOT_NAME"
log_info "VictoriaMetrics backup complete: $BACKUP_FILE"
# Delete snapshot after backup
curl -s -X POST "http://localhost:8428/snapshot/delete?snapshot=$SNAPSHOT_NAME"
log_info "Snapshot deleted from VictoriaMetrics"
else
log_warn "Snapshot directory not found at $SNAPSHOT_PATH"
log_warn "If using Docker, you may need to run this inside the container"
fi
}
# ============================================
# Backup DuckDB
# ============================================
backup_duckdb() {
log_info "Backing up DuckDB analytics database..."
# DuckDB is a single file, so we can just copy it
DUCKDB_PATH="/var/lib/docker/volumes/manacore-analytics/_data/metrics.duckdb"
if [ -f "$DUCKDB_PATH" ]; then
BACKUP_FILE="$BACKUP_DIR/analytics-$DATE.duckdb"
cp "$DUCKDB_PATH" "$BACKUP_FILE"
log_info "DuckDB backup complete: $BACKUP_FILE"
# Also export to Parquet for long-term archival
PARQUET_FILE="$BACKUP_DIR/analytics-$DATE.parquet"
if command -v duckdb &> /dev/null; then
duckdb "$DUCKDB_PATH" -c "COPY daily_metrics TO '$PARQUET_FILE' (FORMAT PARQUET)"
log_info "Parquet export complete: $PARQUET_FILE"
else
log_warn "duckdb CLI not found, skipping Parquet export"
fi
else
log_warn "DuckDB file not found at $DUCKDB_PATH"
# Try alternative: backup via API
log_info "Attempting backup via API..."
curl -s "http://localhost:3001/api/analytics/latest" > "$BACKUP_DIR/analytics-latest-$DATE.json"
curl -s "http://localhost:3001/api/analytics/growth?days=365" > "$BACKUP_DIR/analytics-growth-$DATE.json"
log_info "API backup complete"
fi
}
# ============================================
# Cleanup old backups
# ============================================
cleanup_old_backups() {
log_info "Cleaning up backups older than $RETENTION_DAYS days..."
find "$BACKUP_DIR" -type f -mtime +$RETENTION_DAYS -delete
log_info "Cleanup complete"
}
# ============================================
# Main
# ============================================
main() {
log_info "Starting ManaCore Monitoring Backup"
log_info "Backup directory: $BACKUP_DIR"
log_info "Date: $DATE"
echo ""
backup_victoriametrics
echo ""
backup_duckdb
echo ""
cleanup_old_backups
echo ""
log_info "All backups complete!"
log_info "Files in $BACKUP_DIR:"
ls -lh "$BACKUP_DIR"
}
# Run main function
main "$@"

View file

@ -607,6 +607,30 @@ const APP_CONFIGS = [
PUBLIC_BACKEND_URL: (env) => `http://localhost:${env.TECHBASE_BACKEND_PORT || '3021'}`, PUBLIC_BACKEND_URL: (env) => `http://localhost:${env.TECHBASE_BACKEND_PORT || '3021'}`,
}, },
}, },
// Zitare Telegram Bot
{
path: 'services/telegram-zitare-bot/.env',
vars: {
NODE_ENV: () => 'development',
PORT: (env) => env.ZITARE_BOT_PORT || '3303',
TELEGRAM_BOT_TOKEN: (env) => env.ZITARE_BOT_TELEGRAM_TOKEN,
DATABASE_URL: (env) => env.ZITARE_BOT_DATABASE_URL,
},
},
// Todo Telegram Bot
{
path: 'services/telegram-todo-bot/.env',
vars: {
NODE_ENV: () => 'development',
PORT: (env) => env.TODO_BOT_PORT || '3304',
TELEGRAM_BOT_TOKEN: (env) => env.TODO_BOT_TELEGRAM_TOKEN,
DATABASE_URL: (env) => env.TODO_BOT_DATABASE_URL,
TODO_API_URL: (env) => env.TODO_BOT_API_URL || 'http://localhost:3018',
MANA_CORE_AUTH_URL: (env) => env.MANA_CORE_AUTH_URL,
},
},
]; ];
function main() { function main() {

View file

@ -75,6 +75,9 @@ ALL_DATABASES=(
"planta" "planta"
"nutriphi" "nutriphi"
"projectdoc" "projectdoc"
"zitare_bot"
"todo_bot"
"nutriphi_bot"
) )
# Check if specific service requested # Check if specific service requested
@ -160,9 +163,21 @@ setup_service() {
create_db_if_not_exists "projectdoc" create_db_if_not_exists "projectdoc"
push_schema "@manacore/telegram-project-doc-bot" "projectdoc" push_schema "@manacore/telegram-project-doc-bot" "projectdoc"
;; ;;
zitare_bot|zitare-bot)
create_db_if_not_exists "zitare_bot"
push_schema "@manacore/telegram-zitare-bot" "zitare-bot"
;;
todo_bot|todo-bot)
create_db_if_not_exists "todo_bot"
push_schema "@manacore/telegram-todo-bot" "todo-bot"
;;
nutriphi_bot|nutriphi-bot)
create_db_if_not_exists "nutriphi_bot"
push_schema "@manacore/telegram-nutriphi-bot" "nutriphi-bot"
;;
*) *)
echo -e "${RED}Unknown service: $service${NC}" echo -e "${RED}Unknown service: $service${NC}"
echo "Available services: auth, chat, zitare, contacts, calendar, clock, todo, manadeck, mail, moodlit, finance, voxel-lava, figgos, planta, nutriphi, presi, storage" echo "Available services: auth, chat, zitare, contacts, calendar, clock, todo, manadeck, mail, moodlit, finance, voxel-lava, figgos, planta, nutriphi, presi, storage, projectdoc, zitare_bot, todo_bot, nutriphi_bot"
exit 1 exit 1
;; ;;
esac esac

View file

@ -45,3 +45,6 @@ coverage/
.cache/ .cache/
tmp/ tmp/
temp/ temp/
# DuckDB local data
data/

View file

@ -50,7 +50,8 @@
"rxjs": "^7.8.1", "rxjs": "^7.8.1",
"stripe": "^17.5.0", "stripe": "^17.5.0",
"winston": "^3.17.0", "winston": "^3.17.0",
"zod": "^3.24.1" "zod": "^3.24.1",
"duckdb-async": "^1.1.1"
}, },
"devDependencies": { "devDependencies": {
"@nestjs/cli": "^11.0.0", "@nestjs/cli": "^11.0.0",

View file

@ -0,0 +1,135 @@
import { Controller, Get, Post, Query, Res, HttpStatus } from '@nestjs/common';
import { Response } from 'express';
import { AnalyticsService } from './analytics.service';
@Controller('analytics')
export class AnalyticsController {
constructor(private readonly analyticsService: AnalyticsService) {}
/**
* Health check endpoint
*/
@Get('health')
async getHealth() {
return this.analyticsService.getHealth();
}
/**
* Get latest metrics snapshot
*/
@Get('latest')
async getLatest() {
const metrics = await this.analyticsService.getLatestMetrics();
if (!metrics) {
return { message: 'No metrics recorded yet' };
}
return metrics;
}
/**
* Get user growth data
* @param days Number of days to look back (default: 90)
*/
@Get('growth')
async getGrowth(@Query('days') days?: string) {
const numDays = days ? parseInt(days, 10) : 90;
return this.analyticsService.getUserGrowth(numDays);
}
/**
* Get monthly aggregated metrics
* @param months Number of months to look back (default: 12)
*/
@Get('monthly')
async getMonthly(@Query('months') months?: string) {
const numMonths = months ? parseInt(months, 10) : 12;
return this.analyticsService.getMonthlyMetrics(numMonths);
}
/**
* Get metrics for a date range
* @param start Start date (YYYY-MM-DD)
* @param end End date (YYYY-MM-DD)
*/
@Get('range')
async getRange(@Query('start') start: string, @Query('end') end: string) {
if (!start || !end) {
return { error: 'Both start and end dates are required (YYYY-MM-DD format)' };
}
return this.analyticsService.getMetricsRange(start, end);
}
/**
* Trigger manual snapshot (for testing/recovery)
*/
@Post('snapshot')
async triggerSnapshot() {
await this.analyticsService.recordDailySnapshot();
return { message: 'Snapshot recorded successfully' };
}
/**
* Grafana JSON API compatible endpoint - query
* Used by Grafana Infinity datasource
*/
@Post('grafana/query')
async grafanaQuery(@Res() res: Response) {
// Return available targets
const latest = await this.analyticsService.getLatestMetrics();
const growth = await this.analyticsService.getUserGrowth(30);
res.status(HttpStatus.OK).json([
{
target: 'total_users',
datapoints: growth.map((g) => [g.total_users, new Date(g.date).getTime()]),
},
{
target: 'daily_growth',
datapoints: growth.map((g) => [g.growth ?? 0, new Date(g.date).getTime()]),
},
]);
}
/**
* Grafana JSON API compatible endpoint - search
* Returns available metrics
*/
@Post('grafana/search')
async grafanaSearch() {
return [
'total_users',
'verified_users',
'new_users_today',
'new_users_week',
'new_users_month',
'daily_growth',
];
}
/**
* Summary endpoint for dashboards
*/
@Get('summary')
async getSummary() {
const latest = await this.analyticsService.getLatestMetrics();
const monthly = await this.analyticsService.getMonthlyMetrics(2);
const health = await this.analyticsService.getHealth();
const currentMonth = monthly[monthly.length - 1];
const previousMonth = monthly[monthly.length - 2];
return {
current: latest,
trends: {
month_over_month_growth:
currentMonth && previousMonth
? ((currentMonth.total_users_eom - previousMonth.total_users_eom) /
previousMonth.total_users_eom) *
100
: null,
new_users_this_month: currentMonth?.new_users ?? 0,
},
health,
};
}
}

View file

@ -0,0 +1,12 @@
import { Module } from '@nestjs/common';
import { ScheduleModule } from '@nestjs/schedule';
import { AnalyticsService } from './analytics.service';
import { AnalyticsController } from './analytics.controller';
@Module({
imports: [ScheduleModule.forRoot()],
controllers: [AnalyticsController],
providers: [AnalyticsService],
exports: [AnalyticsService],
})
export class AnalyticsModule {}

View file

@ -0,0 +1,327 @@
import { Injectable, Logger, OnModuleInit, OnModuleDestroy } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { Cron, CronExpression } from '@nestjs/schedule';
import { Database } from 'duckdb-async';
import { sql } from 'drizzle-orm';
import { getDb } from '../db/connection';
import * as fs from 'fs';
import * as path from 'path';
export interface DailyMetrics {
date: string;
total_users: number;
verified_users: number;
new_users_today: number;
new_users_week: number;
new_users_month: number;
total_db_size_bytes: number | null;
recorded_at: string;
}
export interface GrowthData {
date: string;
total_users: number;
growth: number | null;
growth_percent: number | null;
}
export interface MonthlyMetrics {
month: string;
total_users_eom: number;
new_users: number;
growth_percent: number | null;
}
@Injectable()
export class AnalyticsService implements OnModuleInit, OnModuleDestroy {
private readonly logger = new Logger(AnalyticsService.name);
private duckdb: Database | null = null;
private readonly dbPath: string;
private readonly databaseUrl: string;
constructor(private readonly configService: ConfigService) {
this.dbPath = this.configService.get<string>('DUCKDB_PATH', './data/metrics.duckdb');
this.databaseUrl = this.configService.get<string>('DATABASE_URL', '');
}
async onModuleInit(): Promise<void> {
try {
// Ensure the directory exists
const dbDir = path.dirname(this.dbPath);
if (!fs.existsSync(dbDir)) {
fs.mkdirSync(dbDir, { recursive: true });
this.logger.log(`Created DuckDB directory: ${dbDir}`);
}
this.duckdb = await Database.create(this.dbPath);
await this.initializeSchema();
this.logger.log(`DuckDB initialized at ${this.dbPath}`);
// Record initial snapshot if database is empty
const count = await this.getRecordCount();
if (count === 0) {
this.logger.log('No existing records found, recording initial snapshot...');
await this.recordDailySnapshot();
}
} catch (error) {
this.logger.error('Failed to initialize DuckDB', error);
}
}
async onModuleDestroy(): Promise<void> {
if (this.duckdb) {
await this.duckdb.close();
this.logger.log('DuckDB connection closed');
}
}
private async initializeSchema(): Promise<void> {
if (!this.duckdb) return;
await this.duckdb.run(`
CREATE TABLE IF NOT EXISTS daily_metrics (
date DATE PRIMARY KEY,
total_users INTEGER NOT NULL,
verified_users INTEGER NOT NULL,
new_users_today INTEGER NOT NULL,
new_users_week INTEGER NOT NULL,
new_users_month INTEGER NOT NULL,
total_db_size_bytes BIGINT,
recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
`);
this.logger.log('DuckDB schema initialized');
}
private async getRecordCount(): Promise<number> {
if (!this.duckdb) return 0;
const result = await this.duckdb.all('SELECT COUNT(*) as count FROM daily_metrics');
return Number(result[0]?.count ?? 0);
}
/**
* Record daily snapshot - runs at midnight UTC
*/
@Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT)
async recordDailySnapshot(): Promise<void> {
if (!this.duckdb) {
this.logger.warn('DuckDB not initialized, skipping snapshot');
return;
}
try {
const today = new Date().toISOString().split('T')[0];
// Get user counts from PostgreSQL
const [totalUsers, verifiedUsers, newToday, newWeek, newMonth, dbSize] = await Promise.all([
this.countTotalUsers(),
this.countVerifiedUsers(),
this.countUsersCreatedSince(1),
this.countUsersCreatedSince(7),
this.countUsersCreatedSince(30),
this.getDatabaseSize(),
]);
// Insert or replace in DuckDB
await this.duckdb.run(
`
INSERT OR REPLACE INTO daily_metrics
(date, total_users, verified_users, new_users_today, new_users_week, new_users_month, total_db_size_bytes, recorded_at)
VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
`,
today,
totalUsers,
verifiedUsers,
newToday,
newWeek,
newMonth,
dbSize
);
this.logger.log(`Daily snapshot recorded for ${today}: ${totalUsers} total users`);
} catch (error) {
this.logger.error('Failed to record daily snapshot', error);
}
}
/**
* Get user growth over time
*/
async getUserGrowth(days: number = 90): Promise<GrowthData[]> {
if (!this.duckdb) return [];
const result = await this.duckdb.all(
`
SELECT
date::VARCHAR as date,
total_users,
total_users - LAG(total_users) OVER (ORDER BY date) as growth,
ROUND(((total_users::FLOAT - LAG(total_users) OVER (ORDER BY date)) /
NULLIF(LAG(total_users) OVER (ORDER BY date), 0)) * 100, 2) as growth_percent
FROM daily_metrics
WHERE date > CURRENT_DATE - INTERVAL '${days} days'
ORDER BY date
`
);
return result as GrowthData[];
}
/**
* Get monthly aggregated metrics
*/
async getMonthlyMetrics(months: number = 12): Promise<MonthlyMetrics[]> {
if (!this.duckdb) return [];
const result = await this.duckdb.all(
`
SELECT
strftime(date_trunc('month', date), '%Y-%m') as month,
MAX(total_users)::INTEGER as total_users_eom,
SUM(new_users_today)::INTEGER as new_users,
ROUND(((MAX(total_users)::FLOAT - MIN(total_users)) /
NULLIF(MIN(total_users), 0)) * 100, 2) as growth_percent
FROM daily_metrics
WHERE date > CURRENT_DATE - INTERVAL '${months} months'
GROUP BY date_trunc('month', date)
ORDER BY month
`
);
return result as MonthlyMetrics[];
}
/**
* Get latest metrics
*/
async getLatestMetrics(): Promise<DailyMetrics | null> {
if (!this.duckdb) return null;
const result = await this.duckdb.all(`
SELECT
date::VARCHAR as date,
total_users,
verified_users,
new_users_today,
new_users_week,
new_users_month,
total_db_size_bytes::INTEGER as total_db_size_bytes,
recorded_at::VARCHAR as recorded_at
FROM daily_metrics
ORDER BY date DESC
LIMIT 1
`);
return (result[0] as DailyMetrics) ?? null;
}
/**
* Get all metrics for a date range
*/
async getMetricsRange(startDate: string, endDate: string): Promise<DailyMetrics[]> {
if (!this.duckdb) return [];
const result = await this.duckdb.all(
`
SELECT
date::VARCHAR as date,
total_users,
verified_users,
new_users_today,
new_users_week,
new_users_month,
total_db_size_bytes::INTEGER as total_db_size_bytes,
recorded_at::VARCHAR as recorded_at
FROM daily_metrics
WHERE date BETWEEN ? AND ?
ORDER BY date
`,
startDate,
endDate
);
return result as DailyMetrics[];
}
/**
* Health check for the analytics service
*/
async getHealth(): Promise<{
status: string;
database_path: string;
database_size_bytes: number | null;
total_records: number;
latest_snapshot: string | null;
}> {
const recordCount = await this.getRecordCount();
const latest = await this.getLatestMetrics();
return {
status: this.duckdb ? 'healthy' : 'unhealthy',
database_path: this.dbPath,
database_size_bytes: null, // DuckDB doesn't expose this easily
total_records: recordCount,
latest_snapshot: latest?.date ?? null,
};
}
/**
* Export metrics to Parquet format (for archival)
*/
async exportToParquet(outputPath: string): Promise<void> {
if (!this.duckdb) {
throw new Error('DuckDB not initialized');
}
await this.duckdb.run(`COPY daily_metrics TO '${outputPath}' (FORMAT PARQUET)`);
this.logger.log(`Metrics exported to ${outputPath}`);
}
// ============================================
// PostgreSQL Query Helpers
// ============================================
private getPostgresDb() {
if (!this.databaseUrl) {
throw new Error('DATABASE_URL not configured');
}
return getDb(this.databaseUrl);
}
private async countTotalUsers(): Promise<number> {
const db = this.getPostgresDb();
const result = await db.execute(sql`SELECT COUNT(*) as count FROM auth.users`);
const row = result[0] as { count: string | number } | undefined;
return Number(row?.count ?? 0);
}
private async countVerifiedUsers(): Promise<number> {
const db = this.getPostgresDb();
const result = await db.execute(
sql`SELECT COUNT(*) as count FROM auth.users WHERE email_verified = true`
);
const row = result[0] as { count: string | number } | undefined;
return Number(row?.count ?? 0);
}
private async countUsersCreatedSince(days: number): Promise<number> {
const db = this.getPostgresDb();
const result = await db.execute(
sql`SELECT COUNT(*) as count FROM auth.users WHERE created_at > NOW() - INTERVAL '${sql.raw(days.toString())} days'`
);
const row = result[0] as { count: string | number } | undefined;
return Number(row?.count ?? 0);
}
private async getDatabaseSize(): Promise<number | null> {
try {
const db = this.getPostgresDb();
const result = await db.execute(sql`SELECT pg_database_size(current_database()) as size`);
const row = result[0] as { size: string | number } | undefined;
return Number(row?.size ?? 0);
} catch {
return null;
}
}
}

View file

@ -0,0 +1,3 @@
export * from './analytics.module';
export * from './analytics.service';
export * from './analytics.controller';

View file

@ -12,6 +12,7 @@ import { TagsModule } from './tags/tags.module';
import { AiModule } from './ai/ai.module'; import { AiModule } from './ai/ai.module';
import { HealthModule } from './health/health.module'; import { HealthModule } from './health/health.module';
import { MetricsModule } from './metrics'; import { MetricsModule } from './metrics';
import { AnalyticsModule } from './analytics';
import { HttpExceptionFilter } from './common/filters/http-exception.filter'; import { HttpExceptionFilter } from './common/filters/http-exception.filter';
@Module({ @Module({
@ -27,6 +28,7 @@ import { HttpExceptionFilter } from './common/filters/http-exception.filter';
}, },
]), ]),
MetricsModule, MetricsModule,
AnalyticsModule,
AiModule, AiModule,
AuthModule, AuthModule,
CreditsModule, CreditsModule,