diff --git a/services/mana-stt/grafana-dashboard.json b/services/mana-stt/grafana-dashboard.json new file mode 100644 index 000000000..398380ca7 --- /dev/null +++ b/services/mana-stt/grafana-dashboard.json @@ -0,0 +1,740 @@ +{ + "annotations": { + "list": [] + }, + "description": "ManaCore Speech-to-Text Service Monitoring", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 1, "text": "DOWN" }, + "1": { "color": "green", "index": 0, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 3, "x": 0, "y": 1 }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "up{job=\"mana-stt\"}", + "refId": "A" + } + ], + "title": "Service Status", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "yellow", "index": 0, "text": "Not Loaded" }, + "1": { "color": "green", "index": 1, "text": "Loaded" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "yellow", "value": null }, + { "color": "green", "value": 1 } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 3, "x": 3, "y": 1 }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "mana_stt_model_loaded{model=\"whisper\"}", + "refId": "A" + } + ], + "title": "Whisper Model", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "yellow", "index": 0, "text": "Not Loaded" }, + "1": { "color": "green", "index": 1, "text": "Loaded" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "yellow", "value": null }, + { "color": "green", "value": 1 } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 3, "x": 6, "y": 1 }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "mana_stt_model_loaded{model=\"voxtral\"}", + "refId": "A" + } + ], + "title": "Voxtral Model", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 3, "x": 9, "y": 1 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(mana_stt_requests_total{status=\"success\"})", + "refId": "A" + } + ], + "title": "Total Transcriptions", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 3, "x": 12, "y": 1 }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(mana_stt_characters_transcribed_total)", + "refId": "A" + } + ], + "title": "Characters Transcribed", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "red", "value": 3 } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 3, "x": 15, "y": 1 }, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(mana_stt_active_requests)", + "refId": "A" + } + ], + "title": "Active Requests", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 1 } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 3, "x": 18, "y": 1 }, + "id": 7, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(mana_stt_requests_total{status=\"error\"})", + "refId": "A" + } + ], + "title": "Total Errors", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 3, "x": 21, "y": 1 }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.50, sum(rate(mana_stt_transcription_duration_seconds_bucket[5m])) by (le))", + "refId": "A" + } + ], + "title": "Median Duration", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "id": 101, + "panels": [], + "title": "Performance", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }, + "id": 10, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.50, sum(rate(mana_stt_transcription_duration_seconds_bucket{model=\"whisper\"}[5m])) by (le))", + "legendFormat": "Whisper p50", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.95, sum(rate(mana_stt_transcription_duration_seconds_bucket{model=\"whisper\"}[5m])) by (le))", + "legendFormat": "Whisper p95", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.50, sum(rate(mana_stt_transcription_duration_seconds_bucket{model=\"voxtral\"}[5m])) by (le))", + "legendFormat": "Voxtral p50", + "refId": "C" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.95, sum(rate(mana_stt_transcription_duration_seconds_bucket{model=\"voxtral\"}[5m])) by (le))", + "legendFormat": "Voxtral p95", + "refId": "D" + } + ], + "title": "Transcription Duration (p50 / p95)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }, + "id": 11, + "options": { + "legend": { + "calcs": ["mean", "sum"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(mana_stt_requests_total{model=\"whisper\", status=\"success\"}[5m]))", + "legendFormat": "Whisper Success", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(mana_stt_requests_total{model=\"voxtral\", status=\"success\"}[5m]))", + "legendFormat": "Voxtral Success", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(mana_stt_requests_total{status=\"error\"}[5m]))", + "legendFormat": "Errors", + "refId": "C" + } + ], + "title": "Request Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "id": 102, + "panels": [], + "title": "Details", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false } }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 6, "x": 0, "y": 15 }, + "id": 12, + "options": { + "legend": { "displayMode": "list", "placement": "right", "showLegend": true }, + "pieType": "pie", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(mana_stt_requests_total{status=\"success\"}) by (model)", + "legendFormat": "{{model}}", + "refId": "A" + } + ], + "title": "Requests by Model", + "type": "piechart" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false } }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 6, "x": 6, "y": 15 }, + "id": 13, + "options": { + "legend": { "displayMode": "list", "placement": "right", "showLegend": true }, + "pieType": "pie", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(mana_stt_requests_total{status=\"success\"}) by (language)", + "legendFormat": "{{language}}", + "refId": "A" + } + ], + "title": "Requests by Language", + "type": "piechart" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 }, + "id": 14, + "options": { + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(mana_stt_file_size_mb_sum{model=\"whisper\"}[5m])) * 1024 * 1024", + "legendFormat": "Whisper", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(mana_stt_file_size_mb_sum{model=\"voxtral\"}[5m])) * 1024 * 1024", + "legendFormat": "Voxtral", + "refId": "B" + } + ], + "title": "Data Processed", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 }, + "id": 103, + "panels": [], + "title": "Model Loading", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 30 }, + { "color": "red", "value": 60 } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 6, "w": 8, "x": 0, "y": 24 }, + "id": 15, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "mana_stt_model_load_duration_seconds_sum{model=\"whisper\"} / mana_stt_model_load_duration_seconds_count{model=\"whisper\"}", + "legendFormat": "Whisper", + "refId": "A" + } + ], + "title": "Whisper Load Time", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 60 }, + { "color": "red", "value": 120 } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 6, "w": 8, "x": 8, "y": 24 }, + "id": 16, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "mana_stt_model_load_duration_seconds_sum{model=\"voxtral\"} / mana_stt_model_load_duration_seconds_count{model=\"voxtral\"}", + "legendFormat": "Voxtral", + "refId": "A" + } + ], + "title": "Voxtral Load Time", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 6, "w": 8, "x": 16, "y": 24 }, + "id": 17, + "options": { + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(mana_stt_characters_transcribed_total{model=\"whisper\"}[5m]))", + "legendFormat": "Whisper", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(mana_stt_characters_transcribed_total{model=\"voxtral\"}[5m]))", + "legendFormat": "Voxtral", + "refId": "B" + } + ], + "title": "Characters/sec Transcribed", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": ["manacore", "stt", "ai"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "ManaCore STT Service", + "uid": "mana-stt-dashboard", + "version": 1, + "weekStart": "monday" +} diff --git a/services/ollama-metrics-proxy/grafana-dashboard.json b/services/ollama-metrics-proxy/grafana-dashboard.json new file mode 100644 index 000000000..6fa5e171c --- /dev/null +++ b/services/ollama-metrics-proxy/grafana-dashboard.json @@ -0,0 +1,595 @@ +{ + "annotations": { "list": [] }, + "description": "Ollama LLM Monitoring - Generation metrics, token usage, model performance", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { + "options": { + "0": { "color": "red", "text": "DOWN" }, + "1": { "color": "green", "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + } + } + }, + "gridPos": { "h": 4, "w": 3, "x": 0, "y": 1 }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [{ "expr": "up{job=\"ollama\"}", "refId": "A" }], + "title": "Proxy Status", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 4, "w": 3, "x": 3, "y": 1 }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [{ "expr": "sum(ollama_requests_total{status=\"success\"})", "refId": "A" }], + "title": "Total Requests", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 4, "w": 3, "x": 6, "y": 1 }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [{ "expr": "sum(ollama_tokens_generated_total)", "refId": "A" }], + "title": "Total Tokens Generated", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 4, "w": 3, "x": 9, "y": 1 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [{ "expr": "sum(ollama_prompt_tokens_total)", "refId": "A" }], + "title": "Total Prompt Tokens", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "red", "value": 3 } + ] + }, + "unit": "short" + } + }, + "gridPos": { "h": 4, "w": 3, "x": 12, "y": 1 }, + "id": 5, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [{ "expr": "sum(ollama_active_requests)", "refId": "A" }], + "title": "Active Requests", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 1 } + ] + }, + "unit": "short" + } + }, + "gridPos": { "h": 4, "w": 3, "x": 15, "y": 1 }, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [{ "expr": "sum(ollama_requests_total{status=\"error\"})", "refId": "A" }], + "title": "Total Errors", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + } + }, + "gridPos": { "h": 4, "w": 3, "x": 18, "y": 1 }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "histogram_quantile(0.50, sum(rate(ollama_generation_duration_seconds_bucket[5m])) by (le))", + "refId": "A" + } + ], + "title": "Median Generation Time", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + } + }, + "gridPos": { "h": 4, "w": 3, "x": 21, "y": 1 }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "histogram_quantile(0.50, sum(rate(ollama_time_to_first_token_seconds_bucket[5m])) by (le))", + "refId": "A" + } + ], + "title": "Median TTFT", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "id": 101, + "panels": [], + "title": "Performance", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }, + "id": 10, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "expr": "histogram_quantile(0.50, sum(rate(ollama_generation_duration_seconds_bucket[5m])) by (le, model))", + "legendFormat": "{{model}} p50", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(ollama_generation_duration_seconds_bucket[5m])) by (le, model))", + "legendFormat": "{{model}} p95", + "refId": "B" + } + ], + "title": "Generation Duration by Model (p50 / p95)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }, + "id": 11, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "expr": "histogram_quantile(0.50, sum(rate(ollama_time_to_first_token_seconds_bucket[5m])) by (le, model))", + "legendFormat": "{{model}} p50", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(ollama_time_to_first_token_seconds_bucket[5m])) by (le, model))", + "legendFormat": "{{model}} p95", + "refId": "B" + } + ], + "title": "Time to First Token by Model (p50 / p95)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "id": 102, + "panels": [], + "title": "Token Usage", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 }, + "id": 12, + "options": { + "legend": { + "calcs": ["sum"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "expr": "sum(rate(ollama_tokens_generated_total[5m])) by (model)", + "legendFormat": "{{model}}", + "refId": "A" + } + ], + "title": "Tokens Generated per Second by Model", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "reqps" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 }, + "id": 13, + "options": { + "legend": { + "calcs": ["mean", "sum"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "expr": "sum(rate(ollama_requests_total{status=\"success\"}[5m])) by (model)", + "legendFormat": "{{model}}", + "refId": "A" + } + ], + "title": "Requests per Second by Model", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 }, + "id": 103, + "panels": [], + "title": "Model Distribution", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false } }, + "mappings": [] + } + }, + "gridPos": { "h": 8, "w": 8, "x": 0, "y": 24 }, + "id": 14, + "options": { + "legend": { "displayMode": "list", "placement": "right", "showLegend": true }, + "pieType": "pie", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "expr": "sum(ollama_requests_total{status=\"success\"}) by (model)", + "legendFormat": "{{model}}", + "refId": "A" + } + ], + "title": "Requests by Model", + "type": "piechart" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false } }, + "mappings": [] + } + }, + "gridPos": { "h": 8, "w": 8, "x": 8, "y": 24 }, + "id": 15, + "options": { + "legend": { "displayMode": "list", "placement": "right", "showLegend": true }, + "pieType": "pie", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "expr": "sum(ollama_tokens_generated_total) by (model)", + "legendFormat": "{{model}}", + "refId": "A" + } + ], + "title": "Tokens by Model", + "type": "piechart" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false } }, + "mappings": [] + } + }, + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 24 }, + "id": 16, + "options": { + "legend": { "displayMode": "list", "placement": "right", "showLegend": true }, + "pieType": "pie", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "expr": "sum(ollama_requests_total{status=\"success\"}) by (endpoint)", + "legendFormat": "{{endpoint}}", + "refId": "A" + } + ], + "title": "Requests by Endpoint", + "type": "piechart" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": ["manacore", "ollama", "llm", "ai"], + "templating": { + "list": [ + { + "current": { "selected": false, "text": "Prometheus", "value": "Prometheus" }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "Ollama LLM Service", + "uid": "ollama-dashboard", + "version": 1, + "weekStart": "monday" +} diff --git a/services/telegram-project-doc-bot/.env.example b/services/telegram-project-doc-bot/.env.example index 6a57b14a2..ecc6c0fc0 100644 --- a/services/telegram-project-doc-bot/.env.example +++ b/services/telegram-project-doc-bot/.env.example @@ -15,10 +15,15 @@ S3_ACCESS_KEY=minioadmin S3_SECRET_KEY=minioadmin S3_BUCKET=projectdoc-storage -# AI - Transcription (OpenAI Whisper) +# AI - Transcription (STT) +STT_PROVIDER=local # local | openai +STT_LOCAL_URL=http://localhost:3020 # mana-stt service URL +STT_MODEL=whisper # whisper | voxtral + +# OpenAI (optional fallback for STT, required if STT_PROVIDER=openai) OPENAI_API_KEY=sk-your-openai-key # AI - Generation LLM_PROVIDER=ollama # ollama | openai -OLLAMA_URL=http://localhost:11434 +OLLAMA_URL=http://localhost:11435 # Use :11435 for metrics proxy, :11434 for direct OLLAMA_MODEL=gemma3:4b diff --git a/services/telegram-project-doc-bot/src/config/configuration.ts b/services/telegram-project-doc-bot/src/config/configuration.ts index 2ee99c866..467903802 100644 --- a/services/telegram-project-doc-bot/src/config/configuration.ts +++ b/services/telegram-project-doc-bot/src/config/configuration.ts @@ -20,6 +20,11 @@ export default () => ({ openai: { apiKey: process.env.OPENAI_API_KEY, }, + stt: { + provider: process.env.STT_PROVIDER || 'local', // 'local' or 'openai' + localUrl: process.env.STT_LOCAL_URL || 'http://localhost:3020', + model: process.env.STT_MODEL || 'whisper', // 'whisper' or 'voxtral' + }, llm: { provider: process.env.LLM_PROVIDER || 'ollama', ollama: { diff --git a/services/telegram-project-doc-bot/src/database/schema.ts b/services/telegram-project-doc-bot/src/database/schema.ts index 10c2fb2db..371bc9c20 100644 --- a/services/telegram-project-doc-bot/src/database/schema.ts +++ b/services/telegram-project-doc-bot/src/database/schema.ts @@ -1,10 +1,19 @@ -import { pgTable, uuid, text, timestamp, integer, jsonb, boolean } from 'drizzle-orm/pg-core'; +import { + pgTable, + uuid, + text, + timestamp, + integer, + bigint, + jsonb, + boolean, +} from 'drizzle-orm/pg-core'; import { relations } from 'drizzle-orm'; // Projects table export const projects = pgTable('projects', { id: uuid('id').primaryKey().defaultRandom(), - telegramUserId: integer('telegram_user_id').notNull(), + telegramUserId: bigint('telegram_user_id', { mode: 'number' }).notNull(), name: text('name').notNull(), description: text('description'), status: text('status').default('active').notNull(), // active, archived, completed diff --git a/services/telegram-project-doc-bot/src/transcription/transcription.service.ts b/services/telegram-project-doc-bot/src/transcription/transcription.service.ts index 631a7b84a..74d7f5af9 100644 --- a/services/telegram-project-doc-bot/src/transcription/transcription.service.ts +++ b/services/telegram-project-doc-bot/src/transcription/transcription.service.ts @@ -2,48 +2,115 @@ import { Injectable, Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import OpenAI from 'openai'; +interface LocalSTTResponse { + text: string; + language?: string; + model: string; +} + @Injectable() export class TranscriptionService { private readonly logger = new Logger(TranscriptionService.name); private readonly openai: OpenAI | null; + private readonly provider: 'local' | 'openai'; + private readonly localUrl: string; + private readonly sttModel: string; constructor(private configService: ConfigService) { + this.provider = this.configService.get('stt.provider', 'local') as 'local' | 'openai'; + this.localUrl = this.configService.get('stt.localUrl', 'http://localhost:3020'); + this.sttModel = this.configService.get('stt.model', 'whisper'); + const apiKey = this.configService.get('openai.apiKey'); if (apiKey) { this.openai = new OpenAI({ apiKey }); - this.logger.log('OpenAI Whisper initialized'); + this.logger.log('OpenAI Whisper available as fallback'); } else { this.openai = null; - this.logger.warn('OpenAI API key not configured - transcription disabled'); } + + this.logger.log( + `STT Provider: ${this.provider}, URL: ${this.localUrl}, Model: ${this.sttModel}` + ); } async transcribe(audioBuffer: Buffer, filename = 'audio.ogg'): Promise { + // Try local STT first if configured + if (this.provider === 'local') { + try { + return await this.transcribeLocal(audioBuffer, filename); + } catch (error) { + this.logger.warn(`Local STT failed, trying OpenAI fallback: ${error}`); + if (this.openai) { + return await this.transcribeOpenAI(audioBuffer, filename); + } + throw error; + } + } + + // Use OpenAI + if (this.openai) { + return await this.transcribeOpenAI(audioBuffer, filename); + } + + throw new Error('No STT provider available'); + } + + private async transcribeLocal(audioBuffer: Buffer, filename: string): Promise { + const endpoint = this.sttModel === 'voxtral' ? '/transcribe/voxtral' : '/transcribe'; + const url = `${this.localUrl}${endpoint}`; + + this.logger.debug(`Calling local STT: ${url}`); + + const formData = new FormData(); + const uint8Array = new Uint8Array(audioBuffer); + const blob = new Blob([uint8Array], { type: 'audio/ogg' }); + formData.append('file', blob, filename); + formData.append('language', 'de'); + + const response = await fetch(url, { + method: 'POST', + body: formData, + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error(`Local STT error: ${response.status} - ${error}`); + } + + const result: LocalSTTResponse = await response.json(); + this.logger.debug(`Local STT result: ${result.text.length} chars, model: ${result.model}`); + + return result.text; + } + + private async transcribeOpenAI(audioBuffer: Buffer, filename: string): Promise { if (!this.openai) { - throw new Error('Transcription not available - OpenAI API key not configured'); + throw new Error('OpenAI not configured'); } try { - // Create a File object from the buffer using Uint8Array const uint8Array = new Uint8Array(audioBuffer); const file = new File([uint8Array], filename, { type: 'audio/ogg' }); const response = await this.openai.audio.transcriptions.create({ file, model: 'whisper-1', - language: 'de', // Default to German, could be made configurable + language: 'de', }); - this.logger.debug(`Transcribed ${audioBuffer.length} bytes -> ${response.text.length} chars`); + this.logger.debug( + `OpenAI transcribed ${audioBuffer.length} bytes -> ${response.text.length} chars` + ); return response.text; } catch (error) { - this.logger.error('Transcription failed:', error); + this.logger.error('OpenAI transcription failed:', error); throw new Error('Transkription fehlgeschlagen'); } } isAvailable(): boolean { - return this.openai !== null; + return this.provider === 'local' || this.openai !== null; } }