From 1038faf7b42d5ef3557315adf65355b2b7cea9b1 Mon Sep 17 00:00:00 2001 From: Evan Baker Date: Tue, 1 Oct 2024 15:36:14 +0000 Subject: [PATCH] adds basic infiniband grafana dashboard Signed-off-by: Evan Baker --- .../legacy/grafana/dashboards/infiniband.json | 746 ++++++++++++++++++ 1 file changed, 746 insertions(+) create mode 100644 deploy/legacy/grafana/dashboards/infiniband.json diff --git a/deploy/legacy/grafana/dashboards/infiniband.json b/deploy/legacy/grafana/dashboards/infiniband.json new file mode 100644 index 0000000000..15ab5d07a7 --- /dev/null +++ b/deploy/legacy/grafana/dashboards/infiniband.json @@ -0,0 +1,746 @@ +{ + "__elements": {}, + "__inputs": [], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "11.2.0" + }, + { + "id": "prometheus", + "name": "Prometheus", + "type": "datasource", + "version": "1.0.0" + }, + { + "id": "state-timeline", + "name": "State timeline", + "type": "panel", + "version": "" + }, + { + "id": "table", + "name": "Table", + "type": "panel", + "version": "" + }, + { + "id": "timeseries", + "name": "Time series", + "type": "panel", + "version": "" + } + ], + "description": "Retina infiniband metrics", + "editable": true, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 7, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "node_infiniband_info{device=\"$ibdev\"}", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "range": false, + "refId": "A", + "useBackend": false + } + ], + "title": "Infiniband Metadata", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "container": true, + "endpoint": true, + "job": true, + "namespace": true, + "pod": true, + "service": true + }, + "includeByName": {}, + "indexByName": { + "Time": 5, + "Value": 13, + "__name__": 6, + "board_id": 2, + "container": 7, + "device": 1, + "endpoint": 8, + "firmware_version": 3, + "hca_type": 4, + "instance": 0, + "job": 9, + "namespace": 10, + "pod": 11, + "service": 12 + }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "fillOpacity": 70, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "yellow", + "value": null + }, + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 1.0002 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 6, + "options": { + "alignValue": "left", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "mergeValues": true, + "rowHeight": 0.9, + "showValue": "never", + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_physical_state_id{device=\"$ibdev\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "PHY {{instance}} {{device}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_state_id{device=\"$ibdev\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "LINK {{instance}} {{device}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Physical/Link State", + "type": "state-timeline" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_port_discards_transmitted_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Port Discard {{instance}} {{device}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_vl15_dropped_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "VL15 Drops {{instance}} {{device}}", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_port_constraint_errors_received_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Port Constrain RX {{instance}} {{device}}", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_port_constraint_errors_transmitted_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Port Constrain TX {{instance}} {{device}}", + "range": true, + "refId": "D", + "useBackend": false + } + ], + "title": "Congestion Drops/Discards", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_port_data_transmitted_bytes_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "TX {{instance}} {{device}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_port_data_received_bytes_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "RX {{instance}} {{device}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "RX/TX Data", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_port_packets_received_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "RX {{instance}} {{device}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_port_packets_transmitted_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "TX {{instance}} {{device}}", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_unicast_packets_received_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "RX UNI {{instance}} {{device}}", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_unicast_packets_transmitted_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "TX UNI {{instance}} {{device}}", + "range": true, + "refId": "D", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_multicast_packets_received_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "RX MULTI {{instance}} {{device}}", + "range": true, + "refId": "E", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "node_infiniband_multicast_packets_transmitted_total{device=\"$ibdev\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "TX MULTI {{instance}} {{device}}", + "range": true, + "refId": "F", + "useBackend": false + } + ], + "title": "RX/TX Packets", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "description": "The Infiniband Device name(s). Usually mlx5_1", + "hide": 0, + "label": "Inifiniband Device Name", + "name": "ibdev", + "options": [ + { + "selected": true, + "text": "mlx5_1", + "value": "mlx5_1" + } + ], + "query": "mlx5_1", + "skipUrlSync": false, + "type": "textbox" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Infiniband", + "uid": "bdzi2qzehm8zkd", + "version": 10 +}