mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 20:21:09 +02:00
feat(infra): add deploy tracking with PostgreSQL, Pushgateway & Grafana dashboard
Instrument the CD pipeline to record per-deploy and per-service metrics (build time, image size, startup time, health status) into PostgreSQL and push gauges to Pushgateway. Adds a Grafana dashboard with 13 panels covering deploy frequency, build performance, service health, and history. New files: - scripts/mac-mini/init-deploy-tracking.sql (idempotent DDL) - scripts/deploy-metrics.sh (bash library for CI) - docker/grafana/provisioning/datasources/deploy-tracking.yml - docker/grafana/dashboards/deploy-tracking.json Modified: - docker/prometheus/prometheus.yml (pushgateway scrape job) - .github/workflows/cd-macmini.yml (build/health instrumentation) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a5940abfc2
commit
3f91c4656a
6 changed files with 933 additions and 38 deletions
265
.github/workflows/cd-macmini.yml
vendored
265
.github/workflows/cd-macmini.yml
vendored
|
|
@ -152,6 +152,15 @@ jobs:
|
||||||
cd "${{ env.PROJECT_DIR }}"
|
cd "${{ env.PROJECT_DIR }}"
|
||||||
git pull origin main
|
git pull origin main
|
||||||
|
|
||||||
|
- name: Init deploy tracking
|
||||||
|
id: init
|
||||||
|
run: |
|
||||||
|
cd "${{ env.PROJECT_DIR }}"
|
||||||
|
source scripts/deploy-metrics.sh
|
||||||
|
deploy_timer_start
|
||||||
|
echo "start_epoch=$DEPLOY_START_EPOCH" >> $GITHUB_OUTPUT
|
||||||
|
ensure_deploy_schema
|
||||||
|
|
||||||
- name: Ensure env vars exist
|
- name: Ensure env vars exist
|
||||||
run: |
|
run: |
|
||||||
cd "${{ env.PROJECT_DIR }}"
|
cd "${{ env.PROJECT_DIR }}"
|
||||||
|
|
@ -197,61 +206,241 @@ jobs:
|
||||||
echo "deploy-all=false" >> $GITHUB_OUTPUT
|
echo "deploy-all=false" >> $GITHUB_OUTPUT
|
||||||
echo "Services to deploy: $SERVICES"
|
echo "Services to deploy: $SERVICES"
|
||||||
|
|
||||||
- name: Deploy all services
|
- name: Build and deploy services
|
||||||
if: steps.services.outputs.deploy-all == 'true'
|
id: build
|
||||||
run: |
|
run: |
|
||||||
cd "${{ env.PROJECT_DIR }}"
|
cd "${{ env.PROJECT_DIR }}"
|
||||||
echo "=== Rebuilding and restarting ALL services ==="
|
source scripts/deploy-metrics.sh
|
||||||
docker compose -f "${{ env.COMPOSE_FILE }}" --env-file "${{ env.ENV_FILE }}" up -d --build
|
|
||||||
echo "=== Waiting for services to start ==="
|
|
||||||
sleep 15
|
|
||||||
docker compose -f "${{ env.COMPOSE_FILE }}" ps
|
|
||||||
|
|
||||||
- name: Deploy changed services
|
DEPLOY_ALL="${{ steps.services.outputs.deploy-all }}"
|
||||||
if: steps.services.outputs.deploy-all == 'false' && steps.services.outputs.services != ''
|
|
||||||
run: |
|
|
||||||
cd "${{ env.PROJECT_DIR }}"
|
|
||||||
SERVICES="${{ steps.services.outputs.services }}"
|
SERVICES="${{ steps.services.outputs.services }}"
|
||||||
echo "=== Rebuilding: $SERVICES ==="
|
|
||||||
docker compose -f "${{ env.COMPOSE_FILE }}" --env-file "${{ env.ENV_FILE }}" up -d --build --no-deps $SERVICES
|
# Determine final service list
|
||||||
|
if [ "$DEPLOY_ALL" == "true" ]; then
|
||||||
|
# Get all service names from compose file
|
||||||
|
SERVICES=$(docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" config --services | tr '\n' ' ')
|
||||||
|
echo "=== Rebuilding ALL services ==="
|
||||||
|
elif [ -z "$SERVICES" ]; then
|
||||||
|
echo "No services to deploy"
|
||||||
|
echo "build-times=" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "=== Rebuilding: $SERVICES ==="
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Build each service individually to capture build times
|
||||||
|
BUILD_TIMES=""
|
||||||
|
for svc in $SERVICES; do
|
||||||
|
echo "--- Building $svc ---"
|
||||||
|
build_start=$(date +%s)
|
||||||
|
docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" build "$svc" 2>&1 || true
|
||||||
|
build_end=$(date +%s)
|
||||||
|
build_dur=$(( build_end - build_start ))
|
||||||
|
BUILD_TIMES="$BUILD_TIMES $svc:$build_dur"
|
||||||
|
echo " $svc built in ${build_dur}s"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Start all services at once (no rebuild, images already built)
|
||||||
|
echo "=== Starting services ==="
|
||||||
|
if [ "$DEPLOY_ALL" == "true" ]; then
|
||||||
|
docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d
|
||||||
|
else
|
||||||
|
docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" up -d --no-deps $SERVICES
|
||||||
|
fi
|
||||||
echo "=== Waiting for services to start ==="
|
echo "=== Waiting for services to start ==="
|
||||||
sleep 10
|
sleep 10
|
||||||
|
|
||||||
|
echo "build-times=$BUILD_TIMES" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
- name: Health checks
|
- name: Health checks
|
||||||
|
id: health
|
||||||
|
run: |
|
||||||
|
cd "${{ env.PROJECT_DIR }}"
|
||||||
|
source scripts/deploy-metrics.sh
|
||||||
|
|
||||||
|
# Map of service -> health URL
|
||||||
|
declare -A HEALTH_URLS=(
|
||||||
|
["mana-auth"]="http://localhost:3001/health"
|
||||||
|
["matrix-web"]="http://localhost:5180/health"
|
||||||
|
["chat-backend"]="http://localhost:3030/health"
|
||||||
|
["chat-web"]="http://localhost:5010/health"
|
||||||
|
["todo-backend"]="http://localhost:3031/health"
|
||||||
|
["todo-web"]="http://localhost:5011/health"
|
||||||
|
["calendar-backend"]="http://localhost:3032/health"
|
||||||
|
["calendar-web"]="http://localhost:5012/health"
|
||||||
|
["clock-backend"]="http://localhost:3033/health"
|
||||||
|
["clock-web"]="http://localhost:5013/health"
|
||||||
|
["contacts-backend"]="http://localhost:3034/health"
|
||||||
|
["contacts-web"]="http://localhost:5014/health"
|
||||||
|
)
|
||||||
|
|
||||||
|
DEPLOY_ALL="${{ steps.services.outputs.deploy-all }}"
|
||||||
|
SERVICES="${{ steps.services.outputs.services }}"
|
||||||
|
|
||||||
|
HEALTH_RESULTS=""
|
||||||
|
echo "=== Health Checks ==="
|
||||||
|
|
||||||
|
# Check all known health endpoints
|
||||||
|
for svc in "${!HEALTH_URLS[@]}"; do
|
||||||
|
url="${HEALTH_URLS[$svc]}"
|
||||||
|
result=$(check_health_timed "$svc" "$url" 2>/dev/null) || true
|
||||||
|
status=$(echo "$result" | awk '{print $1}')
|
||||||
|
elapsed=$(echo "$result" | awk '{print $2}')
|
||||||
|
http_code=$(echo "$result" | awk '{print $3}')
|
||||||
|
|
||||||
|
if [ -z "$status" ]; then
|
||||||
|
status="skipped"
|
||||||
|
elapsed="0"
|
||||||
|
http_code="0"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$status" = "ok" ]; then
|
||||||
|
echo " ✓ $svc: OK (${elapsed}s)"
|
||||||
|
else
|
||||||
|
echo " ✗ $svc: $status (HTTP $http_code, ${elapsed}s)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
HEALTH_RESULTS="$HEALTH_RESULTS $svc:$status:$http_code:$elapsed"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "health-results=$HEALTH_RESULTS" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Record deploy metrics
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
cd "${{ env.PROJECT_DIR }}"
|
||||||
|
source scripts/deploy-metrics.sh
|
||||||
|
|
||||||
|
START_EPOCH="${{ steps.init.outputs.start_epoch }}"
|
||||||
|
NOW=$(date +%s)
|
||||||
|
DURATION=$(( NOW - START_EPOCH ))
|
||||||
|
|
||||||
|
# Determine overall status
|
||||||
|
STATUS="success"
|
||||||
|
if [ "${{ job.status }}" != "success" ]; then
|
||||||
|
STATUS="failure"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Determine services list
|
||||||
|
DEPLOY_ALL="${{ steps.services.outputs.deploy-all }}"
|
||||||
|
SERVICES="${{ steps.services.outputs.services }}"
|
||||||
|
if [ "$DEPLOY_ALL" == "true" ]; then
|
||||||
|
SERVICES_CSV="all"
|
||||||
|
else
|
||||||
|
SERVICES_CSV=$(echo "$SERVICES" | tr ' ' ',')
|
||||||
|
fi
|
||||||
|
|
||||||
|
COMMIT_MSG=$(git log -1 --pretty=%s 2>/dev/null | head -c 200 || echo "unknown")
|
||||||
|
BRANCH="${{ github.ref_name }}"
|
||||||
|
|
||||||
|
# Insert deployment row
|
||||||
|
DEPLOY_ID=$(insert_deployment \
|
||||||
|
"${{ github.run_id }}" \
|
||||||
|
"${{ github.run_attempt }}" \
|
||||||
|
"${{ github.sha }}" \
|
||||||
|
"$COMMIT_MSG" \
|
||||||
|
"$BRANCH" \
|
||||||
|
"${{ github.event_name }}" \
|
||||||
|
"${{ github.actor }}" \
|
||||||
|
"$SERVICES_CSV" \
|
||||||
|
"$STATUS" 2>/dev/null) || DEPLOY_ID=""
|
||||||
|
|
||||||
|
if [ -n "$DEPLOY_ID" ]; then
|
||||||
|
# Finalise with duration
|
||||||
|
finalise_deployment "$DEPLOY_ID" "$STATUS" "$DURATION" 2>/dev/null || true
|
||||||
|
|
||||||
|
# Parse build times: "svc1:42 svc2:31"
|
||||||
|
BUILD_TIMES="${{ steps.build.outputs.build-times }}"
|
||||||
|
declare -A BUILD_DUR_MAP
|
||||||
|
for entry in $BUILD_TIMES; do
|
||||||
|
svc="${entry%%:*}"
|
||||||
|
dur="${entry#*:}"
|
||||||
|
BUILD_DUR_MAP["$svc"]="$dur"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Parse health results: "svc1:ok:200:5.0 svc2:failed:503:30.0"
|
||||||
|
HEALTH_RESULTS="${{ steps.health.outputs.health-results }}"
|
||||||
|
declare -A HEALTH_MAP HTTP_MAP STARTUP_MAP
|
||||||
|
for entry in $HEALTH_RESULTS; do
|
||||||
|
svc=$(echo "$entry" | cut -d: -f1)
|
||||||
|
h_status=$(echo "$entry" | cut -d: -f2)
|
||||||
|
h_code=$(echo "$entry" | cut -d: -f3)
|
||||||
|
h_time=$(echo "$entry" | cut -d: -f4)
|
||||||
|
HEALTH_MAP["$svc"]="$h_status"
|
||||||
|
HTTP_MAP["$svc"]="$h_code"
|
||||||
|
STARTUP_MAP["$svc"]="$h_time"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Combine: for each service that was built or health-checked, insert a row
|
||||||
|
ALL_SVCS=$(echo "$BUILD_TIMES $HEALTH_RESULTS" | tr ' ' '\n' | cut -d: -f1 | sort -u | tr '\n' ' ')
|
||||||
|
for svc in $ALL_SVCS; do
|
||||||
|
[ -z "$svc" ] && continue
|
||||||
|
build_dur="${BUILD_DUR_MAP[$svc]:-0}"
|
||||||
|
img_mb=$(get_image_size_mb "$svc" 2>/dev/null || echo "0")
|
||||||
|
startup="${STARTUP_MAP[$svc]:-0}"
|
||||||
|
health="${HEALTH_MAP[$svc]:-skipped}"
|
||||||
|
http_code="${HTTP_MAP[$svc]:-0}"
|
||||||
|
|
||||||
|
insert_deploy_service "$DEPLOY_ID" "$svc" "$build_dur" "$img_mb" "$startup" "$health" "$http_code" 2>/dev/null || true
|
||||||
|
push_service_metrics "$svc" "$build_dur" "$img_mb" "$health" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Push overall metrics to Pushgateway
|
||||||
|
push_deploy_metrics "$STATUS" "$DURATION" "$BRANCH" 2>/dev/null || true
|
||||||
|
echo "Deploy tracking recorded: status=$STATUS duration=${DURATION}s"
|
||||||
|
|
||||||
|
- name: Summary
|
||||||
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
cd "${{ env.PROJECT_DIR }}"
|
cd "${{ env.PROJECT_DIR }}"
|
||||||
|
|
||||||
check_health() {
|
START_EPOCH="${{ steps.init.outputs.start_epoch }}"
|
||||||
local name=$1
|
NOW=$(date +%s)
|
||||||
local url=$2
|
DURATION=$(( NOW - START_EPOCH ))
|
||||||
local status=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "$url" 2>/dev/null || echo "000")
|
|
||||||
if [ "$status" == "200" ]; then
|
|
||||||
echo " ✓ $name: OK"
|
|
||||||
else
|
|
||||||
echo " ✗ $name: FAILED (HTTP $status)"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
echo "=== Health Checks ==="
|
|
||||||
check_health "Auth API" "http://localhost:3001/health"
|
|
||||||
check_health "Matrix Web" "http://localhost:5180/health"
|
|
||||||
check_health "Chat Backend" "http://localhost:3030/health"
|
|
||||||
check_health "Chat Web" "http://localhost:5010/health"
|
|
||||||
check_health "Todo Backend" "http://localhost:3031/health"
|
|
||||||
check_health "Todo Web" "http://localhost:5011/health"
|
|
||||||
check_health "Calendar Backend" "http://localhost:3032/health"
|
|
||||||
check_health "Calendar Web" "http://localhost:5012/health"
|
|
||||||
check_health "Clock Backend" "http://localhost:3033/health"
|
|
||||||
check_health "Clock Web" "http://localhost:5013/health"
|
|
||||||
|
|
||||||
- name: Summary
|
|
||||||
run: |
|
|
||||||
echo "## Deployment Summary" >> $GITHUB_STEP_SUMMARY
|
echo "## Deployment Summary" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "**Trigger:** ${{ github.event_name }}" >> $GITHUB_STEP_SUMMARY
|
echo "**Trigger:** ${{ github.event_name }}" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "**Commit:** ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY
|
echo "**Commit:** \`${{ github.sha }}\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "**Duration:** ${DURATION}s" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "**Status:** ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
if [ "${{ steps.services.outputs.deploy-all }}" == "true" ]; then
|
if [ "${{ steps.services.outputs.deploy-all }}" == "true" ]; then
|
||||||
echo "**Services:** All" >> $GITHUB_STEP_SUMMARY
|
echo "**Services:** All" >> $GITHUB_STEP_SUMMARY
|
||||||
else
|
else
|
||||||
echo "**Services:** ${{ steps.services.outputs.services }}" >> $GITHUB_STEP_SUMMARY
|
echo "**Services:** ${{ steps.services.outputs.services }}" >> $GITHUB_STEP_SUMMARY
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Build times table
|
||||||
|
BUILD_TIMES="${{ steps.build.outputs.build-times }}"
|
||||||
|
if [ -n "$BUILD_TIMES" ]; then
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### Build Times" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "| Service | Duration |" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "|---------|----------|" >> $GITHUB_STEP_SUMMARY
|
||||||
|
for entry in $BUILD_TIMES; do
|
||||||
|
svc="${entry%%:*}"
|
||||||
|
dur="${entry#*:}"
|
||||||
|
echo "| $svc | ${dur}s |" >> $GITHUB_STEP_SUMMARY
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Health results table
|
||||||
|
HEALTH_RESULTS="${{ steps.health.outputs.health-results }}"
|
||||||
|
if [ -n "$HEALTH_RESULTS" ]; then
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### Health Checks" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "| Service | Status | HTTP | Startup |" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "|---------|--------|------|---------|" >> $GITHUB_STEP_SUMMARY
|
||||||
|
for entry in $HEALTH_RESULTS; do
|
||||||
|
svc=$(echo "$entry" | cut -d: -f1)
|
||||||
|
h_status=$(echo "$entry" | cut -d: -f2)
|
||||||
|
h_code=$(echo "$entry" | cut -d: -f3)
|
||||||
|
h_time=$(echo "$entry" | cut -d: -f4)
|
||||||
|
icon="✓"
|
||||||
|
[ "$h_status" != "ok" ] && icon="✗"
|
||||||
|
echo "| $svc | $icon $h_status | $h_code | ${h_time}s |" >> $GITHUB_STEP_SUMMARY
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
|
||||||
487
docker/grafana/dashboards/deploy-tracking.json
Normal file
487
docker/grafana/dashboards/deploy-tracking.json
Normal file
|
|
@ -0,0 +1,487 @@
|
||||||
|
{
|
||||||
|
"annotations": { "list": [] },
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
|
||||||
|
"id": 100,
|
||||||
|
"panels": [],
|
||||||
|
"title": "Status Overview",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null },
|
||||||
|
{ "color": "yellow", "value": 3600 },
|
||||||
|
{ "color": "red", "value": 86400 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 },
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"] }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "time() - deploy_last_timestamp_seconds{branch=\"main\"}",
|
||||||
|
"legendFormat": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Last Deploy",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [
|
||||||
|
{
|
||||||
|
"options": {
|
||||||
|
"0": { "color": "red", "text": "FAILED" },
|
||||||
|
"1": { "color": "green", "text": "OK" }
|
||||||
|
},
|
||||||
|
"type": "value"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "red", "value": null },
|
||||||
|
{ "color": "green", "value": 1 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 },
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "none",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"] }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "deploy_last_status{branch=\"main\"}",
|
||||||
|
"legendFormat": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Status",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null },
|
||||||
|
{ "color": "yellow", "value": 300 },
|
||||||
|
{ "color": "red", "value": 600 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 },
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"] }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "deploy_last_duration_seconds{branch=\"main\"}",
|
||||||
|
"legendFormat": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Duration",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [{ "color": "blue", "value": null }]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 },
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"] }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"format": "table",
|
||||||
|
"rawQuery": true,
|
||||||
|
"rawSql": "SELECT COUNT(*) AS \"Deploys\" FROM deploy_tracking.deployments WHERE started_at > NOW() - INTERVAL '30 days';"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Deploys (30d)",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "percentunit",
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "red", "value": null },
|
||||||
|
{ "color": "yellow", "value": 0.8 },
|
||||||
|
{ "color": "green", "value": 0.95 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 },
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"] }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"format": "table",
|
||||||
|
"rawQuery": true,
|
||||||
|
"rawSql": "SELECT CASE WHEN COUNT(*) = 0 THEN 0 ELSE COUNT(*) FILTER (WHERE status = 'success')::float / COUNT(*)::float END AS \"Rate\" FROM deploy_tracking.deployments WHERE started_at > NOW() - INTERVAL '30 days';"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Success Rate (30d)",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "s",
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null },
|
||||||
|
{ "color": "yellow", "value": 300 },
|
||||||
|
{ "color": "red", "value": 600 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 },
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"] }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"format": "table",
|
||||||
|
"rawQuery": true,
|
||||||
|
"rawSql": "SELECT COALESCE(AVG(duration_s), 0) AS \"Avg\" FROM deploy_tracking.deployments WHERE status = 'success' AND started_at > NOW() - INTERVAL '30 days';"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Avg Duration (30d)",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
|
||||||
|
"id": 101,
|
||||||
|
"panels": [],
|
||||||
|
"title": "Deploy Frequency",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "bars",
|
||||||
|
"fillOpacity": 80,
|
||||||
|
"stacking": { "mode": "normal" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": [
|
||||||
|
{
|
||||||
|
"matcher": { "id": "byName", "options": "success" },
|
||||||
|
"properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"matcher": { "id": "byName", "options": "failure" },
|
||||||
|
"properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
|
||||||
|
"id": 7,
|
||||||
|
"options": { "legend": { "displayMode": "list" }, "tooltip": { "mode": "multi" } },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"format": "time_series",
|
||||||
|
"rawQuery": true,
|
||||||
|
"rawSql": "SELECT date_trunc('day', started_at) AS time, status AS metric, COUNT(*) AS value FROM deploy_tracking.deployments WHERE $__timeFilter(started_at) GROUP BY 1, 2 ORDER BY 1;"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Deploys per Day",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [{ "color": "blue", "value": null }]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
|
||||||
|
"id": 8,
|
||||||
|
"options": { "orientation": "horizontal" },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"format": "table",
|
||||||
|
"rawQuery": true,
|
||||||
|
"rawSql": "SELECT s AS \"Service\", COUNT(*) AS \"Deploys\" FROM deploy_tracking.deployments, unnest(services) AS s WHERE started_at > NOW() - INTERVAL '30 days' GROUP BY s ORDER BY COUNT(*) DESC;"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Deploys per Service (30d)",
|
||||||
|
"type": "barchart"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
|
||||||
|
"id": 102,
|
||||||
|
"panels": [],
|
||||||
|
"title": "Build Performance",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"unit": "s",
|
||||||
|
"custom": { "drawStyle": "line", "pointSize": 5, "showPoints": "auto" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 },
|
||||||
|
"id": 9,
|
||||||
|
"options": { "legend": { "displayMode": "list" }, "tooltip": { "mode": "multi" } },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"format": "time_series",
|
||||||
|
"rawQuery": true,
|
||||||
|
"rawSql": "SELECT d.started_at AS time, ds.service_name AS metric, ds.build_duration_s AS value FROM deploy_tracking.deploy_services ds JOIN deploy_tracking.deployments d ON d.id = ds.deployment_id WHERE $__timeFilter(d.started_at) ORDER BY d.started_at;"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Build Duration Trend",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"unit": "decmbytes",
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null },
|
||||||
|
{ "color": "yellow", "value": 500 },
|
||||||
|
{ "color": "red", "value": 1000 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 },
|
||||||
|
"id": 10,
|
||||||
|
"options": { "orientation": "horizontal" },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "deploy_service_image_size_mb",
|
||||||
|
"legendFormat": "{{service}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Image Sizes",
|
||||||
|
"type": "barchart"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 },
|
||||||
|
"id": 103,
|
||||||
|
"panels": [],
|
||||||
|
"title": "Startup & Health",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [
|
||||||
|
{
|
||||||
|
"options": {
|
||||||
|
"0": { "color": "red", "text": "UNHEALTHY" },
|
||||||
|
"1": { "color": "green", "text": "HEALTHY" }
|
||||||
|
},
|
||||||
|
"type": "value"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "red", "value": null },
|
||||||
|
{ "color": "green", "value": 1 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 4, "w": 12, "x": 0, "y": 24 },
|
||||||
|
"id": 11,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "none",
|
||||||
|
"orientation": "horizontal",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"] }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "deploy_service_healthy",
|
||||||
|
"legendFormat": "{{service}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Service Health",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"unit": "s",
|
||||||
|
"custom": { "drawStyle": "line", "pointSize": 5, "showPoints": "auto" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 },
|
||||||
|
"id": 12,
|
||||||
|
"options": { "legend": { "displayMode": "list" }, "tooltip": { "mode": "multi" } },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"format": "time_series",
|
||||||
|
"rawQuery": true,
|
||||||
|
"rawSql": "SELECT d.started_at AS time, ds.service_name AS metric, ds.startup_time_s AS value FROM deploy_tracking.deploy_services ds JOIN deploy_tracking.deployments d ON d.id = ds.deployment_id WHERE ds.startup_time_s IS NOT NULL AND $__timeFilter(d.started_at) ORDER BY d.started_at;"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Startup Time Trend",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 },
|
||||||
|
"id": 104,
|
||||||
|
"panels": [],
|
||||||
|
"title": "Deploy History",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {},
|
||||||
|
"overrides": [
|
||||||
|
{
|
||||||
|
"matcher": { "id": "byName", "options": "status" },
|
||||||
|
"properties": [
|
||||||
|
{
|
||||||
|
"id": "mappings",
|
||||||
|
"value": [
|
||||||
|
{
|
||||||
|
"options": {
|
||||||
|
"failure": { "color": "red", "text": "FAILED" },
|
||||||
|
"success": { "color": "green", "text": "OK" },
|
||||||
|
"running": { "color": "yellow", "text": "RUNNING" }
|
||||||
|
},
|
||||||
|
"type": "value"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"matcher": { "id": "byName", "options": "duration_s" },
|
||||||
|
"properties": [{ "id": "unit", "value": "s" }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 33 },
|
||||||
|
"id": 13,
|
||||||
|
"options": { "showHeader": true, "sortBy": [{ "desc": true, "displayName": "started_at" }] },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "postgres", "uid": "deploy-tracking" },
|
||||||
|
"format": "table",
|
||||||
|
"rawQuery": true,
|
||||||
|
"rawSql": "SELECT started_at, commit_sha, commit_message, deployer, array_to_string(services, ', ') AS services, status, duration_s FROM deploy_tracking.deployments ORDER BY started_at DESC LIMIT 50;"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Recent Deploys",
|
||||||
|
"type": "table"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": ["deploy", "ci-cd"],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"current": { "selected": false, "text": "Prometheus", "value": "Prometheus" },
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": "Datasource",
|
||||||
|
"multi": false,
|
||||||
|
"name": "datasource",
|
||||||
|
"options": [],
|
||||||
|
"query": "prometheus",
|
||||||
|
"refresh": 1,
|
||||||
|
"type": "datasource"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": { "from": "now-30d", "to": "now" },
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Deploy Tracking",
|
||||||
|
"uid": "deploy-tracking",
|
||||||
|
"version": 1
|
||||||
|
}
|
||||||
23
docker/grafana/provisioning/datasources/deploy-tracking.yml
Normal file
23
docker/grafana/provisioning/datasources/deploy-tracking.yml
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
# Deploy Tracking PostgreSQL Datasource
|
||||||
|
# Queries deploy_tracking schema in the mana database
|
||||||
|
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: DeployTracking
|
||||||
|
type: postgres
|
||||||
|
access: proxy
|
||||||
|
url: postgres:5432
|
||||||
|
user: postgres
|
||||||
|
secureJsonData:
|
||||||
|
password: ${POSTGRES_PASSWORD}
|
||||||
|
jsonData:
|
||||||
|
database: mana
|
||||||
|
sslmode: disable
|
||||||
|
maxOpenConns: 5
|
||||||
|
maxIdleConns: 2
|
||||||
|
connMaxLifetime: 14400
|
||||||
|
postgresVersion: 1600
|
||||||
|
timescaledb: false
|
||||||
|
isDefault: false
|
||||||
|
editable: true
|
||||||
|
|
@ -111,3 +111,11 @@ scrape_configs:
|
||||||
# - targets: ['nutriphi-backend:3037']
|
# - targets: ['nutriphi-backend:3037']
|
||||||
# metrics_path: '/metrics'
|
# metrics_path: '/metrics'
|
||||||
# scrape_interval: 30s
|
# scrape_interval: 30s
|
||||||
|
|
||||||
|
# ============================================
|
||||||
|
# Pushgateway (deploy metrics, batch jobs)
|
||||||
|
# ============================================
|
||||||
|
- job_name: 'pushgateway'
|
||||||
|
honor_labels: true
|
||||||
|
static_configs:
|
||||||
|
- targets: ['pushgateway:9091']
|
||||||
|
|
|
||||||
148
scripts/deploy-metrics.sh
Executable file
148
scripts/deploy-metrics.sh
Executable file
|
|
@ -0,0 +1,148 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Deploy Metrics Library
|
||||||
|
# Source this file in CI/CD: source scripts/deploy-metrics.sh
|
||||||
|
#
|
||||||
|
# Provides functions for timing, DB inserts, and Pushgateway pushes.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
DEPLOY_START_EPOCH=""
|
||||||
|
PUSHGATEWAY_URL="http://localhost:9091"
|
||||||
|
PSQL_CMD="docker exec -i mana-infra-postgres psql -U postgres -d mana -tAq"
|
||||||
|
|
||||||
|
# ── Timing ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
deploy_timer_start() {
|
||||||
|
DEPLOY_START_EPOCH=$(date +%s)
|
||||||
|
}
|
||||||
|
|
||||||
|
deploy_timer_elapsed() {
|
||||||
|
local now
|
||||||
|
now=$(date +%s)
|
||||||
|
echo $(( now - DEPLOY_START_EPOCH ))
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Docker helpers ──────────────────────────────────────────
|
||||||
|
|
||||||
|
# Get image size in MB for a compose service
|
||||||
|
# Usage: get_image_size_mb <compose-service-name>
|
||||||
|
get_image_size_mb() {
|
||||||
|
local service="$1"
|
||||||
|
local size_bytes
|
||||||
|
size_bytes=$(docker image inspect "$(docker compose -f "$COMPOSE_FILE" --env-file "$ENV_FILE" images "$service" -q 2>/dev/null)" --format='{{.Size}}' 2>/dev/null || echo "0")
|
||||||
|
echo "scale=2; $size_bytes / 1048576" | bc 2>/dev/null || echo "0"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Health check with retry and timing
|
||||||
|
# Usage: check_health_timed <service-name> <url>
|
||||||
|
# Output: <status> <seconds> <http_code> (e.g. "ok 4.2 200")
|
||||||
|
check_health_timed() {
|
||||||
|
local service="$1"
|
||||||
|
local url="$2"
|
||||||
|
local timeout=30
|
||||||
|
local interval=2
|
||||||
|
local start http_code elapsed
|
||||||
|
|
||||||
|
start=$(date +%s)
|
||||||
|
while true; do
|
||||||
|
http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 3 "$url" 2>/dev/null || echo "000")
|
||||||
|
elapsed=$(( $(date +%s) - start ))
|
||||||
|
|
||||||
|
if [ "$http_code" = "200" ]; then
|
||||||
|
echo "ok ${elapsed}.0 $http_code"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$elapsed" -ge "$timeout" ]; then
|
||||||
|
echo "failed ${elapsed}.0 $http_code"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
sleep "$interval"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Database inserts ────────────────────────────────────────
|
||||||
|
|
||||||
|
# Ensure schema exists (idempotent guard)
|
||||||
|
ensure_deploy_schema() {
|
||||||
|
$PSQL_CMD -c "CREATE SCHEMA IF NOT EXISTS deploy_tracking;" 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
# Insert a deployment row, returns the new id
|
||||||
|
# Usage: insert_deployment <run_id> <run_attempt> <commit_sha> <commit_message> <branch> <trigger> <deployer> <services_csv> <status>
|
||||||
|
insert_deployment() {
|
||||||
|
local run_id="$1" run_attempt="$2" commit_sha="$3" commit_message="$4"
|
||||||
|
local branch="$5" trigger="$6" deployer="$7" services_csv="$8" status="$9"
|
||||||
|
|
||||||
|
# Convert comma-separated to PostgreSQL array literal
|
||||||
|
local pg_array
|
||||||
|
pg_array=$(echo "$services_csv" | sed "s/,/','/g")
|
||||||
|
|
||||||
|
$PSQL_CMD <<SQL
|
||||||
|
INSERT INTO deploy_tracking.deployments
|
||||||
|
(run_id, run_attempt, commit_sha, commit_message, branch, trigger, deployer, services, status)
|
||||||
|
VALUES
|
||||||
|
($run_id, $run_attempt, '$commit_sha', '$(echo "$commit_message" | sed "s/'/''/g")', '$branch', '$trigger', '$deployer', ARRAY['$pg_array'], '$status')
|
||||||
|
RETURNING id;
|
||||||
|
SQL
|
||||||
|
}
|
||||||
|
|
||||||
|
# Finalise a deployment row
|
||||||
|
# Usage: finalise_deployment <id> <status> <duration_s>
|
||||||
|
finalise_deployment() {
|
||||||
|
local id="$1" status="$2" duration_s="$3"
|
||||||
|
$PSQL_CMD <<SQL
|
||||||
|
UPDATE deploy_tracking.deployments
|
||||||
|
SET status = '$status', finished_at = NOW(), duration_s = $duration_s
|
||||||
|
WHERE id = $id;
|
||||||
|
SQL
|
||||||
|
}
|
||||||
|
|
||||||
|
# Insert a service row
|
||||||
|
# Usage: insert_deploy_service <deployment_id> <service_name> <build_duration_s> <image_size_mb> <startup_time_s> <health_status> <health_http_code>
|
||||||
|
insert_deploy_service() {
|
||||||
|
local dep_id="$1" svc="$2" build_dur="$3" img_mb="$4" startup="$5" health="$6" http_code="$7"
|
||||||
|
$PSQL_CMD <<SQL
|
||||||
|
INSERT INTO deploy_tracking.deploy_services
|
||||||
|
(deployment_id, service_name, build_duration_s, image_size_mb, startup_time_s, health_status, health_http_code)
|
||||||
|
VALUES
|
||||||
|
($dep_id, '$svc', $build_dur, $img_mb, $startup, '$health', $http_code);
|
||||||
|
SQL
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Pushgateway ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
# Push overall deploy metrics
|
||||||
|
# Usage: push_deploy_metrics <status> <duration_s> <branch>
|
||||||
|
push_deploy_metrics() {
|
||||||
|
local status="$1" duration_s="$2" branch="$3"
|
||||||
|
local status_val=0
|
||||||
|
[ "$status" = "success" ] && status_val=1
|
||||||
|
|
||||||
|
cat <<PROM | curl -s --data-binary @- "${PUSHGATEWAY_URL}/metrics/job/deploy/branch/${branch}" || true
|
||||||
|
# TYPE deploy_last_timestamp_seconds gauge
|
||||||
|
deploy_last_timestamp_seconds $(date +%s)
|
||||||
|
# TYPE deploy_last_duration_seconds gauge
|
||||||
|
deploy_last_duration_seconds $duration_s
|
||||||
|
# TYPE deploy_last_status gauge
|
||||||
|
deploy_last_status $status_val
|
||||||
|
PROM
|
||||||
|
}
|
||||||
|
|
||||||
|
# Push per-service metrics
|
||||||
|
# Usage: push_service_metrics <service> <build_duration_s> <image_size_mb> <healthy>
|
||||||
|
push_service_metrics() {
|
||||||
|
local svc="$1" build_dur="$2" img_mb="$3" healthy="$4"
|
||||||
|
local healthy_val=0
|
||||||
|
[ "$healthy" = "ok" ] && healthy_val=1
|
||||||
|
|
||||||
|
cat <<PROM | curl -s --data-binary @- "${PUSHGATEWAY_URL}/metrics/job/deploy_service/service/${svc}" || true
|
||||||
|
# TYPE deploy_service_build_duration_seconds gauge
|
||||||
|
deploy_service_build_duration_seconds $build_dur
|
||||||
|
# TYPE deploy_service_image_size_mb gauge
|
||||||
|
deploy_service_image_size_mb $img_mb
|
||||||
|
# TYPE deploy_service_healthy gauge
|
||||||
|
deploy_service_healthy $healthy_val
|
||||||
|
PROM
|
||||||
|
}
|
||||||
40
scripts/mac-mini/init-deploy-tracking.sql
Normal file
40
scripts/mac-mini/init-deploy-tracking.sql
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
-- Deploy Tracking Schema
|
||||||
|
-- Run once: docker exec -i mana-infra-postgres psql -U postgres -d mana < scripts/mac-mini/init-deploy-tracking.sql
|
||||||
|
-- All statements are idempotent (IF NOT EXISTS).
|
||||||
|
|
||||||
|
CREATE SCHEMA IF NOT EXISTS deploy_tracking;
|
||||||
|
|
||||||
|
-- One row per CI/CD run
|
||||||
|
CREATE TABLE IF NOT EXISTS deploy_tracking.deployments (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
run_id BIGINT NOT NULL,
|
||||||
|
run_attempt INTEGER NOT NULL DEFAULT 1,
|
||||||
|
commit_sha VARCHAR(40) NOT NULL,
|
||||||
|
commit_message TEXT,
|
||||||
|
branch VARCHAR(255) NOT NULL DEFAULT 'main',
|
||||||
|
trigger VARCHAR(20) NOT NULL,
|
||||||
|
deployer VARCHAR(255),
|
||||||
|
services TEXT[],
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'running',
|
||||||
|
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
finished_at TIMESTAMPTZ,
|
||||||
|
duration_s NUMERIC(10,2)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- One row per service per deploy
|
||||||
|
CREATE TABLE IF NOT EXISTS deploy_tracking.deploy_services (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
deployment_id BIGINT NOT NULL REFERENCES deploy_tracking.deployments(id) ON DELETE CASCADE,
|
||||||
|
service_name VARCHAR(100) NOT NULL,
|
||||||
|
build_duration_s NUMERIC(10,2),
|
||||||
|
image_size_mb NUMERIC(10,2),
|
||||||
|
startup_time_s NUMERIC(10,2),
|
||||||
|
health_status VARCHAR(10),
|
||||||
|
health_http_code INTEGER
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_deployments_started_at ON deploy_tracking.deployments (started_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_deployments_status ON deploy_tracking.deployments (status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_deploy_services_deployment_id ON deploy_tracking.deploy_services (deployment_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_deploy_services_service_name ON deploy_tracking.deploy_services (service_name);
|
||||||
Loading…
Add table
Add a link
Reference in a new issue