diff --git a/config/grafana/grafana_dashboard.json b/config/grafana/grafana_dashboard.json deleted file mode 100644 index 553e60ec1a1..00000000000 --- a/config/grafana/grafana_dashboard.json +++ /dev/null @@ -1,2404 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "description": "", - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": 16032, - "graphTooltip": 0, - "id": 3, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm Pool (total-assigned)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Assigned IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total IPs Remaining" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 94, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "awscni_total_ip_addresses-awscni_assigned_ip_addresses", - "instant": false, - "interval": "", - "legendFormat": "{{node}}", - "refId": "B" - } - ], - "title": "Warm Pool per Node", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 12, - "y": 0 - }, - "id": 51, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_assigned_ip_addresses -2)", - "format": "time_series", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "# of Busybox Pods", - "transformations": [ - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "sum(awscni_assigned_ip_addresses -2)" - ] - } - } - } - ], - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "\n\nnormal pods + preemptible pods should match\nthe assigned ip addresses count,\nmodulo pods using the host network", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 0 - }, - "id": 83, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "editorMode": "builder", - "expr": "count(awscni_eni_max{k8s_app=\"aws-node\"})", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "AWS Nodes", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 0 - }, - "id": 76, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "expr": "max(max_over_time(awscni_total_ip_addresses[30m])-(max_over_time(awscni_assigned_ip_addresses[30m])))", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Max Net IP Requests[30m]", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 4 - }, - "id": 103, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "expr": "delta(awscni_assigned_ip_addresses[24h])", - "hide": false, - "legendFormat": "{{node}}", - "range": true, - "refId": "A" - } - ], - "title": "Net IP Requests[24h]", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm Pool (total-assigned)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Assigned IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total IPs Remaining" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 4, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 80, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "awscni_total_ip_addresses-awscni_assigned_ip_addresses", - "instant": false, - "interval": "", - "legendFormat": "{{node}}", - "refId": "B" - } - ], - "title": "Warm Pool per Node", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 12 - }, - "id": 97, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "expr": "max_over_time(awscni_total_ip_addresses[30m])-max_over_time(awscni_assigned_ip_addresses[30m])", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Max Net IP Requests[30m]", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 12 - }, - "id": 102, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "expr": "delta(awscni_assigned_ip_addresses[24h])", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Net IP Requests[24h]", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 12 - }, - "id": 101, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "expr": "stddev_over_time(awscni_assigned_ip_addresses[24h])", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "StdDev Net IP Requests[24h]", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [], - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm Pool (total-assigned)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Assigned IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total IPs Remaining per Node" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 12 - }, - "id": 59, - "options": { - "displayLabels": [], - "legend": { - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "avg(awscni_total_ip_addresses-awscni_assigned_ip_addresses)", - "instant": false, - "interval": "", - "legendFormat": "Warm Pool (total-assigned)", - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "avg(awscni_ip_max-awscni_total_ip_addresses)", - "instant": false, - "interval": "", - "legendFormat": "Total IPs Remaining per Node", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "avg(awscni_assigned_ip_addresses)", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Assigned IPs", - "refId": "C" - } - ], - "title": "Average IP Allocation per Node", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [], - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm Pool (total-assigned)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Assigned IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total IPs Remaining" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 12 - }, - "id": 95, - "options": { - "displayLabels": [], - "legend": { - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_total_ip_addresses-awscni_assigned_ip_addresses)", - "instant": false, - "interval": "", - "legendFormat": "Warm Pool (total-assigned)", - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_ip_max-awscni_total_ip_addresses)", - "instant": false, - "interval": "", - "legendFormat": "Total IPs Remaining", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_assigned_ip_addresses)", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Assigned IPs", - "refId": "C" - } - ], - "title": "Cluster IP Allocation", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 61, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "builder", - "expr": "delta(awscni_err_no_avail_addrs{k8s_app=\"aws-node\"}[5m])", - "legendFormat": "{{node}}", - "range": true, - "refId": "A" - } - ], - "title": "No Available Addresses Error", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Assigned IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Warm Pool (total-assigned)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "IP Max per Cluster" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 22 - }, - "id": 6, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.3.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_assigned_ip_addresses)", - "interval": "", - "legendFormat": "Assigned IPs", - "range": true, - "refId": "C" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_ip_max)", - "interval": "", - "legendFormat": "IP Max per Cluster", - "range": true, - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_total_ip_addresses-awscni_assigned_ip_addresses)", - "hide": false, - "interval": "", - "legendFormat": "Warm Pool (total-assigned)", - "range": true, - "refId": "D" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_total_ip_addresses)", - "hide": false, - "interval": "", - "legendFormat": "Total IPs", - "range": true, - "refId": "A" - } - ], - "title": "IP Addresses in the Cluster", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "prometheus-kube-state-metrics, grafana, prometheus-server, prometheus-pushgateway, and coredns are all not host-network", - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 0, - "y": 25 - }, - "id": 40, - "options": { - "footer": { - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "awscni_assigned_ip_addresses\n", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "All Assigned IP Addresses", - "transformations": [ - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "instance", - "pod", - "Value" - ] - } - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": {}, - "renameByName": { - "Value": "Assigned IP Addresses" - } - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 25 - }, - "id": 67, - "maxDataPoints": 1, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "editorMode": "builder", - "expr": "awscni_eni_allocated", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "ENIs Allocated per Node", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 29 - }, - "id": 55, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "builder", - "expr": "rate(awscni_add_ip_req_count{k8s_app=\"aws-node\"}[5m])", - "hide": false, - "legendFormat": "{{node}}", - "range": true, - "refId": "B" - } - ], - "title": "IP Allocation Request per Node [5m]", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 29 - }, - "id": 98, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "builder", - "expr": "delta(awscni_del_ip_req_count{k8s_app=\"aws-node\"}[5m])", - "hide": false, - "legendFormat": "{{node}}", - "range": true, - "refId": "B" - } - ], - "title": "IP Deallocation Request per Node [5m]", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 30 - }, - "id": 14, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.3.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum by (fn) (awscni_ipamd_action_inprogress)", - "interval": "", - "legendFormat": "{{fn}}", - "refId": "A" - } - ], - "title": "IPAMD Actions in Progress", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 8, - "y": 30 - }, - "id": 82, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "count(1 > awscni_eni_util{k8s_app=\"aws-node\"})", - "format": "table", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "__auto", - "refId": "C" - } - ], - "title": "Empty ENIs per Cluster", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 8, - "y": 33 - }, - "id": 74, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "awscni_eni_max", - "format": "table", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "__auto", - "refId": "C" - } - ], - "title": "ENI Max per Node", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 8, - "y": 36 - }, - "id": 66, - "maxDataPoints": 1, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "avg(awscni_ip_max / awscni_eni_max)", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "__auto", - "refId": "C" - } - ], - "title": "Max IPs per ENI", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "displayName": "${__field.labels.node}", - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 36 - }, - "id": 65, - "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "builder", - "expr": "awscni_eni_util", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "# of IPs per ENI (Labeled by Node IP) - Correlate with Allocated ENIs for Delay", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 39 - }, - "id": 4, - "links": [], - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.3.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "exemplar": true, - "expr": "avg by (api)(rate(awscni_aws_api_latency_ms_sum[5m])/rate(awscni_aws_api_latency_ms_count[5m]))", - "interval": "", - "legendFormat": "{{api}}", - "range": true, - "refId": "A" - } - ], - "title": "Average AWS API Latency [5m]", - "type": "timeseries" - } - ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "current": { - "isNone": true, - "selected": false, - "text": "None", - "value": "" - }, - "datasource": { - "uid": "$datasource" - }, - "definition": "label_values(awscni_assigned_ip_addresses{job=\"aws-cni-metrics\"}, instance)", - "hide": 2, - "includeAll": false, - "label": "", - "multi": false, - "name": "instance", - "options": [], - "query": { - "query": "label_values(awscni_assigned_ip_addresses{job=\"aws-cni-metrics\"}, instance)", - "refId": "Prometheus-instance-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "isNone": true, - "selected": false, - "text": "None", - "value": "" - }, - "datasource": { - "uid": "$datasource" - }, - "definition": "label_values(awscni_del_ip_req_count{job=\"aws-cni-metrics\"}, reason)", - "hide": 2, - "includeAll": false, - "multi": false, - "name": "ip_deletion_reason", - "options": [], - "query": { - "query": "label_values(awscni_del_ip_req_count{job=\"aws-cni-metrics\"}, reason)", - "refId": "Prometheus-ip_deletion_reason-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "filters": [], - "hide": 0, - "name": "Filters", - "skipUrlSync": false, - "type": "adhoc" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ] - }, - "timezone": "", - "title": "VPC CNI IPAMD Warm Pool Testing", - "uid": "p7lNf06Vk", - "version": 115, - "weekStart": "" -} \ No newline at end of file diff --git a/pkg/ipamd/datastore/data_store.go b/pkg/ipamd/datastore/data_store.go index 56412562878..8adc2965ab4 100644 --- a/pkg/ipamd/datastore/data_store.go +++ b/pkg/ipamd/datastore/data_store.go @@ -122,18 +122,18 @@ var ( }, []string{"cidr"}, ) - noAvailableAddrs = prometheus.NewCounter( + noAvailableIPAddrs = prometheus.NewCounter( prometheus.CounterOpts{ Name: "awscni_err_no_avail_addrs", - Help: "The number of IP/Prefix assignments that fail due to no available addresses at the ENI level", + Help: "The number of pod IP assignments that fail due to no available IP addresses", }, ) - eniUtilization = prometheus.NewGaugeVec( + eniIPsInUse = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "awscni_eni_util", Help: "The number of allocated ips partitioned by eni", }, - []string{"fn"}, + []string{"eni"}, ) prometheusRegistered = false ) @@ -357,8 +357,8 @@ func prometheusRegister() { prometheus.MustRegister(forceRemovedIPs) prometheus.MustRegister(totalPrefixes) prometheus.MustRegister(ipsPerCidr) - prometheus.MustRegister(noAvailableAddrs) - prometheus.MustRegister(eniUtilization) + prometheus.MustRegister(noAvailableIPAddrs) + prometheus.MustRegister(eniIPsInUse) prometheusRegistered = true } } @@ -452,6 +452,8 @@ func (ds *DataStore) ReadBackingStore(isv6Enabled bool) error { cidr.IPAddresses[ipAddr.String()] = addr ds.assignPodIPAddressUnsafe(addr, allocation.IPAMKey, allocation.Metadata, time.Unix(0, allocation.AllocationTimestamp)) ds.log.Debugf("Recovered %s => %s/%s", allocation.IPAMKey, eni.ID, addr.Address) + // Increment ENI IP usage upon finding assigned ips + eniIPsInUse.WithLabelValues(eni.ID).Inc() // Update prometheus for ips per cidr // Secondary IP mode will have /32:1 and Prefix mode will have /28: ipsPerCidr.With(prometheus.Labels{"cidr": cidr.Cidr.String()}).Inc() @@ -536,8 +538,9 @@ func (ds *DataStore) AddENI(eniID string, deviceNumber int, isPrimary, isTrunk, DeviceNumber: deviceNumber, AvailableIPv4Cidrs: make(map[string]*CidrInfo)} - ds.GetENIUtilization() enis.Set(float64(len(ds.eniPool))) + // Initialize ENI IPs In Use to 0 when an ENI is created + eniIPsInUse.WithLabelValues(eniID).Set(0) return nil } @@ -727,10 +730,12 @@ func (ds *DataStore) AssignPodIPv6Address(ipamKey IPAMKey, ipamMetadata IPAMMeta delete(V6Cidr.IPAddresses, addr.Address) return "", -1, err } + // Increment ENI IP usage on pod IPv6 allocation + eniIPsInUse.WithLabelValues(eni.ID).Inc() return addr.Address, eni.DeviceNumber, nil } } - noAvailableAddrs.Inc() + noAvailableIPAddrs.Inc() return "", -1, errors.New("assignPodIPv6AddressUnsafe: no available IP addresses") } @@ -793,12 +798,14 @@ func (ds *DataStore) AssignPodIPv4Address(ipamKey IPAMKey, ipamMetadata IPAMMeta ipsPerCidr.With(prometheus.Labels{"cidr": availableCidr.Cidr.String()}).Dec() return "", -1, err } + // Increment ENI IP usage on pod IPv4 allocation + eniIPsInUse.WithLabelValues(eni.ID).Inc() return addr.Address, eni.DeviceNumber, nil } ds.log.Debugf("AssignPodIPv4Address: ENI %s does not have available addresses", eni.ID) } - noAvailableAddrs.Inc() + noAvailableIPAddrs.Inc() ds.log.Errorf("DataStore has no available IP/Prefix addresses") return "", -1, errors.New("assignPodIPv4AddressUnsafe: no available IP/Prefix addresses") } @@ -815,7 +822,6 @@ func (ds *DataStore) assignPodIPAddressUnsafe(addr *AddressInfo, ipamKey IPAMKey addr.IPAMMetadata = ipamMetadata addr.AssignedTime = assignedTime - ds.log.Debugf("IP allocation request") ds.assigned++ // Prometheus gauge assignedIPs.Set(float64(ds.assigned)) @@ -832,7 +838,6 @@ func (ds *DataStore) unassignPodIPAddressUnsafe(addr *AddressInfo) { addr.IPAMKey = IPAMKey{} // unassign the addr addr.IPAMMetadata = IPAMMetadata{} ds.assigned-- - ds.log.Debugf("IP deallocation request") // Prometheus gauge assignedIPs.Set(float64(ds.assigned)) } @@ -886,24 +891,6 @@ func (ds *DataStore) GetIPStats(addressFamily string) *DataStoreStats { return stats } -// GetENIUtilization updates a Prometheus gauge vector with each ENIs id and how many ip addresses are assigned on it -func (ds *DataStore) GetENIUtilization() { - //eniUtilization.Reset() - for _, eni := range ds.eniPool { - count := 0 - for _, assignedAddr := range eni.AvailableIPv4Cidrs { - for _, addr := range assignedAddr.IPAddresses { - if addr.Assigned() { - count += 1 - } - } - } - utilization := count - eniID := eni.ID - eniUtilization.WithLabelValues(eniID).Set(float64(utilization)) - } -} - // GetTrunkENI returns the trunk ENI ID or an empty string func (ds *DataStore) GetTrunkENI() string { ds.lock.Lock() @@ -1110,7 +1097,8 @@ func (ds *DataStore) RemoveUnusedENIFromStore(warmIPTarget, minimumIPTarget, war // Prometheus update enis.Set(float64(len(ds.eniPool))) - ds.GetENIUtilization() + // Delete ENI IPs In Use when ENI is removed + eniIPsInUse.DeleteLabelValues(removableENI) totalIPs.Set(float64(ds.total)) return removableENI } @@ -1165,7 +1153,8 @@ func (ds *DataStore) RemoveENIFromDataStore(eniID string, force bool) error { // Prometheus gauge enis.Set(float64(len(ds.eniPool))) - ds.GetENIUtilization() + // Delete ENI IPs In Use when ENI is removed + eniIPsInUse.DeleteLabelValues(eniID) return nil } @@ -1206,6 +1195,8 @@ func (ds *DataStore) UnassignPodIPAddress(ipamKey IPAMKey) (e *ENI, ip string, d ipsPerCidr.With(prometheus.Labels{"cidr": availableCidr.Cidr.String()}).Dec() ds.log.Infof("UnassignPodIPAddress: sandbox %s's ipAddr %s, DeviceNumber %d", ipamKey, addr.Address, eni.DeviceNumber) + // Decrement ENI IP usage when a pod is deallocated + eniIPsInUse.WithLabelValues(eni.ID).Dec() return eni, addr.Address, eni.DeviceNumber, nil } diff --git a/pkg/ipamd/ipamd.go b/pkg/ipamd/ipamd.go index e6dc6679012..626e8ef9ee0 100644 --- a/pkg/ipamd/ipamd.go +++ b/pkg/ipamd/ipamd.go @@ -679,8 +679,6 @@ func (c *IPAMContext) updateIPPoolIfRequired(ctx context.Context) { if c.shouldRemoveExtraENIs() { c.tryFreeENI() } - // Prometheus Metric - c.dataStore.GetENIUtilization() } // decreaseDatastorePool runs every `interval` and attempts to return unused ENIs and IPs diff --git a/test/integration/warm-pool/clear_warm_env.go b/test/integration/warm-pool/clear_warm_env.go deleted file mode 100644 index d2774bea26a..00000000000 --- a/test/integration/warm-pool/clear_warm_env.go +++ /dev/null @@ -1,26 +0,0 @@ -package warm_pool - -import ( - k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" - . "github.com/onsi/ginkgo/v2" -) - -// Environment variables are not reset before and after each test so that way multiple tests can be run to -// evaluate behavior. You can run this test which will unset all warm pool environment variables. Or, if you -// want to test the behavior with some of those environment variables set, alter them in that file and run it once before -// you run the desired tests. -var _ = Describe("clear warm env", func() { - Context("Clear out environment variables for warm pool for testing", func() { - - It("Unsetting env variables", func() { - k8sUtils.UpdateEnvVarOnDaemonSetAndWaitUntilReady(f, "aws-node", "kube-system", - "aws-node", map[string]string{}, - map[string]struct{}{ - "WARM_ENI_TARGET": {}, - "WARM_IP_TARGET": {}, - "MINIMUM_IP_TARGET": {}, - "WARM_PREFIX_TARGET": {}, - }) - }) - }) -}) diff --git a/test/integration/warm-pool/set_warm_env.go b/test/integration/warm-pool/set_warm_env.go deleted file mode 100644 index e46be8ecdfc..00000000000 --- a/test/integration/warm-pool/set_warm_env.go +++ /dev/null @@ -1,26 +0,0 @@ -package warm_pool - -import ( - k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - . "github.com/onsi/ginkgo/v2" - "strconv" -) - -// Environment variables are not reset before and after each test so that way multiple tests can be run to -// evaluate behavior. You can run this test which will unset all warm pool environment variables. Or, if you -// want to test the behavior with some of those environment variables set, alter them in that file and run it once before -// you run the desired tests. -var _ = Describe("set warm env", func() { - Context("Sets env variables", func() { - - It("Sets env variables", func() { - k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, - utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, - map[string]string{ - "WARM_IP_TARGET": strconv.Itoa(0), - "ENABLE_DYNAMIC_WARM_POOL": strconv.FormatBool(true), - }) - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_1_test.go b/test/integration/warm-pool/use_case_1_test.go deleted file mode 100644 index 5c985745adc..00000000000 --- a/test/integration/warm-pool/use_case_1_test.go +++ /dev/null @@ -1,98 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - v1 "k8s.io/api/core/v1" - "time" -) - -var primaryNode v1.Node - -// This test scales up the cluster to maxPods, then scales it back down to minPods. -var _ = Describe("use case 1", func() { - Context("Quick Scale Up and Down", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(minPods). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods\n", maxPods) - quickScale(maxPods) - - Expect(maxPods).To(Equal(busyboxPodCnt())) - - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods\n", minPods) - quickScale(minPods) - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "1000"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_2_test.go b/test/integration/warm-pool/use_case_2_test.go deleted file mode 100644 index de26bd1938d..00000000000 --- a/test/integration/warm-pool/use_case_2_test.go +++ /dev/null @@ -1,104 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test replicates sawtooth behavior by adding a fixed amount of pods and removing the same fixed amount of pods -// over a preset number of iterations. -var _ = Describe("use case 2", func() { - Context("Sawtooth Fixed Add and Subtract", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - for i := 0; i < iterations; i++ { - By("Loop " + strconv.Itoa(i)) - replicas = checkInRange(replicas + iterPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods\n", replicas) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - - replicas = checkInRange(replicas - iterPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods\n", replicas) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - } - - Expect(minPods).To(Equal(busyboxPodCnt())) - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_3_test.go b/test/integration/warm-pool/use_case_3_test.go deleted file mode 100644 index 7a365c7ddcf..00000000000 --- a/test/integration/warm-pool/use_case_3_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "math/rand" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test adds a random amount of pods and then subtracts a random amount of pods, limited to the number of preset -// iterations. The number of pods should not drop below minPods. -var _ = Describe("use case 3", func() { - Context("Random Scale Fixed Add and Subtract", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - for i := 0; i < iterations; i++ { - By("Loop " + strconv.Itoa(i)) - randPods := incIf(rand.Intn(randDigits)) - // Will scale to a maximum of maxPods - replicas = min(replicas+randPods, maxPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, randPods) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - - randPods = incIf(rand.Intn(randDigits)) - // Will scale to a minimum of minPods pods - replicas = max(replicas-randPods, minPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, - randPods) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - - if replicas == maxPods { - break - } - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_4_test.go b/test/integration/warm-pool/use_case_4_test.go deleted file mode 100644 index 7a8487b4ffe..00000000000 --- a/test/integration/warm-pool/use_case_4_test.go +++ /dev/null @@ -1,102 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test does a random operation with a random number of pods over a preset number of iterations. -var _ = Describe("use case 4", func() { - Context("Random Scale Random Add and Subtract Operations", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - - start := time.Now().Unix() - - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - for i := 0; i < iterations; i++ { - By("Loop " + strconv.Itoa(i)) - result, op, randPods := randOpLoop(replicas) - replicas = checkInRange(result) - if op == "no change" { - fmt.Fprintf(GinkgoWriter, "No change to cluster, %v pods", replicas) - } else { - fmt.Fprintf(GinkgoWriter, "Scaling cluster to %v pods by %v %v pods\n", replicas, op, randPods) - } - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_5_test.go b/test/integration/warm-pool/use_case_5_test.go deleted file mode 100644 index 4b9dbf0c74e..00000000000 --- a/test/integration/warm-pool/use_case_5_test.go +++ /dev/null @@ -1,113 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test scales the cluster up to maxPods by a preset scale and back down again by that same scale -var _ = Describe("use case 5", func() { - Context("Proportionate Scaling", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - scaleAmt := maxPods * scale - - for replicas < maxPods { - i := 0 - By("Loop " + strconv.Itoa(i)) - // Will scale to a maximum of maxPods - replicas = min(replicas+int(scaleAmt), maxPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, int(scaleAmt)) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - for replicas > minPods { - i := 0 - By("Loop " + strconv.Itoa(i)) - // Will scale to a minimum of minPods - replicas = max(replicas-int(scaleAmt), minPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, - int(scaleAmt)) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_6_test.go b/test/integration/warm-pool/use_case_6_test.go deleted file mode 100644 index 84c85b4392e..00000000000 --- a/test/integration/warm-pool/use_case_6_test.go +++ /dev/null @@ -1,117 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "math/rand" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test will scale the cluster up to maxPods with a scale amount that is randomly calculated each loop based on -// maxPods and scales the cluster back down with the same way -var _ = Describe("use case 6", func() { - Context("Random Scaling", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - for replicas < maxPods { - i := 0 - randScale := rand.Intn(randDigits) + 1 // prevent divide by 0 - scaleAmt := int(maxPods / randScale) - By("Loop " + strconv.Itoa(i)) - // Will scale to a maximum of maxPods - replicas = min(replicas+scaleAmt, maxPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, scaleAmt) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - for replicas > minPods { - i := 0 - randScale := rand.Intn(randDigits) - scaleAmt := int(maxPods/randScale) + 1 // prevent divide by 0 - By("Loop " + strconv.Itoa(i)) - // Will scale to a minimum of minPods - replicas = max(replicas-scaleAmt, minPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, - scaleAmt) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_7_test.go b/test/integration/warm-pool/use_case_7_test.go deleted file mode 100644 index bc8b43ce44c..00000000000 --- a/test/integration/warm-pool/use_case_7_test.go +++ /dev/null @@ -1,114 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "math/rand" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test will create a single burst to maxPods at a random interval depending on the preset number of iterations -var _ = Describe("use case 7", func() { - Context("Single Burst Behavior", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - randBurst := rand.Intn(iterations) - - for i := 0; i < iterations; i++ { - By("Loop " + strconv.Itoa(i)) - - if i == randBurst { - fmt.Fprintf(GinkgoWriter, "Burst behavior from %v to %v pods\n", replicas, maxPods) - quickScale(maxPods) - continue - } - - if i == randBurst+1 { - fmt.Fprintf(GinkgoWriter, "Burst behavior over, scaling down from %v to %v pods\n", maxPods, - replicas) - quickScale(replicas) - continue - } - - result, op := randOp(replicas, iterPods) - replicas = checkInRange(result) - fmt.Fprintf(GinkgoWriter, "%v %v pod to cluster to equal %v pods\n", op, iterPods, replicas) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_8_test.go b/test/integration/warm-pool/use_case_8_test.go deleted file mode 100644 index 9ec6f10f2f6..00000000000 --- a/test/integration/warm-pool/use_case_8_test.go +++ /dev/null @@ -1,122 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "math/rand" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test will simulate a preset number of bursts of different sizes relative to maxPods that occur at random -// intervals over a preset number of iterations -var _ = Describe("use case 8", func() { - Context("Multiple Burst Behavior", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - // Creates some bursts of different sizes at random iterations. - burstIdx := rand.Perm(iterations)[:numBursts] - burstMap := make(map[int]int) - for i := 0; i < len(burstIdx); i++ { - key := burstIdx[i] - //value := incIf(rand.Intn(maxPods + 1)) - //value := int(maxPods / (rand.Intn(4) + 1)) - value := int(maxPods) - burstMap[key] = value - } - - for i := 0; i < iterations; i++ { - By("Loop " + strconv.Itoa(i)) - - val, present := burstMap[i] - if present { - fmt.Fprintf(GinkgoWriter, "Burst behavior from %v to %v pods\n", replicas, val) - quickScale(val) - - fmt.Fprintf(GinkgoWriter, "Burst behavior over, scaling down from %v to %v pods\n", val, - replicas) - quickScale(replicas) - continue - } - - result, op := randOp(replicas, iterPods) - replicas = checkInRange(result) - fmt.Fprintf(GinkgoWriter, "%v %v pod from cluster to equal %v pods\n", op, iterPods, replicas) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_9_test.go b/test/integration/warm-pool/use_case_9_test.go deleted file mode 100644 index ce372e1c105..00000000000 --- a/test/integration/warm-pool/use_case_9_test.go +++ /dev/null @@ -1,115 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "math/rand" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test will add a random amount of pods until it gets to maxPods, then subtract a random amount of pods until it -// gets to minPods -var _ = Describe("use case 9", func() { - Context("Random Add to Max, Random Sub to Min", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - for replicas < maxPods { - i := 0 - randPods := incIf(rand.Intn(randDigits)) - By("Loop " + strconv.Itoa(i)) - // Will scale to a maximum of maxPods - replicas = min(replicas+randPods, maxPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, randPods) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - for replicas > minPods { - i := 0 - randPods := incIf(rand.Intn(randDigits)) - By("Loop " + strconv.Itoa(i)) - // Will scale to a minimum of minPods - replicas = max(replicas-randPods, minPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, - randPods) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/warm_pool_suite_test.go b/test/integration/warm-pool/warm_pool_suite_test.go deleted file mode 100644 index 0f8d6dadfa4..00000000000 --- a/test/integration/warm-pool/warm_pool_suite_test.go +++ /dev/null @@ -1,369 +0,0 @@ -// VPC Warm Pool Test Suite -// This test suite is a foundation for evaluating a dynamic warm pool, or ip consumption in general. Pair with grafana -//metrics dashboard to look at ip allocation and consumption. Each test displays the warm pool environment variables -//before and after to evaluate the changes made to the warm pool. Environment variables are not reset before and after -//each test so that way multiple tests can be run to evaluate behavior. You can run the test "clear warm env" which will -//unset all warm pool environment variables. Or, if you want to test the behavior with some of those environment -//variables set, alter them in that test and run it once before you run the desired tests. -// Use Case Test 1: Quick Scale Up and Down -// Use Case Test 2: Sawtooth Fixed Add and Subtract -// Use Case Test 3: Random Scale Fixed Add and Subtract -// Use Case Test 4: Random Scale Random Add and Subtract Operations -// Use Case Test 5: Proportionate Scaling -// Use Case Test 6: Random Scaling -// Use Case Test 7: Single Burst Behavior -// Use Case Test 8: Multiple Burst Behavior -// Use Case Test 9: Random Add to Max, Random Sub to Min - -package warm_pool - -import ( - "encoding/json" - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/aws-sdk-go/service/ec2" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - v1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - "math/rand" - "slices" - "strconv" - "testing" - "time" - - "github.com/aws/amazon-vpc-cni-k8s/test/framework" - k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" -) - -// Warm Pool Test Suite Constants -// Run all tests with these constants or change individual tests to get desired outcome -// Environment variables are used in the tests listed in the (...) -const ( - randDigits = 10 // exclusive, used in rand.Intn to change scale amount, <= maxPods, (3,6,9) - scale = 0.25 // used in set proportional scaling, iterate with a fixed percentage (5) - iterations = 2 // run test over a set number of iterations (2,3,4,7,8) - iterPods = 1 // iterate with a fixed number of pods (2,7,8) - numBursts = 2 // Use Case Test 8, set number of bursts (8) - preventNoChange = 1 // retries x amount of times if randInt/randOp is out of range, if out of range no cluster - // scaling occurs, if set above 0 will increment some areas of no cluster scaling (3, 4, 6, 8, 9) - maxPods = 60 // max pods you want to work with for your cluster (all) - minPods = 0 // tests can be run with a base amount of pods at start (all) - sleep = 1 * time.Minute // sleep interval (all) -) - -var clusterIP = "10.100.140.129" // Get the cluster ip of the prometheus-server service -var primaryInstance *ec2.Instance -var f *framework.Framework -var err error -var coreDNSDeploymentCopy *v1.Deployment - -const CoreDNSDeploymentName = "coredns" -const KubeSystemNamespace = "kube-system" - -type Result struct { - Status string `json:"status"` - Data struct { - ResultType string `json:"resultType"` - Result []struct { - Metric struct { - Name string `json:"__name__"` - AppKubernetesIoInstance string `json:"app_kubernetes_io_instance"` - AppKubernetesIoName string `json:"app_kubernetes_io_name"` - ControllerRevisionHash string `json:"controller_revision_hash"` - Instance string `json:"instance"` - Job string `json:"job"` - K8SApp string `json:"k8s_app"` - Namespace string `json:"namespace"` - Node string `json:"node"` - Pod string `json:"pod"` - PodTemplateGeneration string `json:"pod_template_generation"` - } - Values [][2]interface{} `json:"values"` - } - } -} - -func TestWarmPool(t *testing.T) { - RegisterFailHandler(Fail) - RunSpecs(t, "VPC Warm Pool Test Suite") -} - -var _ = BeforeSuite(func() { - f = framework.New(framework.GlobalOptions) - - By("creating test namespace") - f.K8sResourceManagers.NamespaceManager(). - CreateNamespace(utils.DefaultTestNamespace) - - nodeList, err := f.K8sResourceManagers.NodeManager().GetNodes(f.Options.NgNameLabelKey, - f.Options.NgNameLabelVal) - Expect(err).ToNot(HaveOccurred()) - - numOfNodes := len(nodeList.Items) - Expect(numOfNodes).Should(BeNumerically(">", 1)) - - // Nominate the first untainted node as the one to run coredns deployment against - By("adding nodeSelector in coredns deployment to be scheduled on single node") - var primaryNode *corev1.Node - for _, n := range nodeList.Items { - if len(n.Spec.Taints) == 0 { - primaryNode = &n - break - } - } - Expect(primaryNode).To(Not(BeNil()), "expected to find a non-tainted node") - instanceID := k8sUtils.GetInstanceIDFromNode(*primaryNode) - primaryInstance, err = f.CloudServices.EC2().DescribeInstance(instanceID) - Expect(err).ToNot(HaveOccurred()) - - By("getting node with no pods scheduled to run tests") - coreDNSDeployment, err := f.K8sResourceManagers.DeploymentManager().GetDeployment(CoreDNSDeploymentName, - KubeSystemNamespace) - Expect(err).ToNot(HaveOccurred()) - - // Copy the deployment to restore later - coreDNSDeploymentCopy = coreDNSDeployment.DeepCopy() - - // Add nodeSelector label to coredns deployment so coredns pods are scheduled on 'primary' node - coreDNSDeployment.Spec.Template.Spec.NodeSelector = map[string]string{ - "kubernetes.io/hostname": primaryNode.Labels["kubernetes.io/hostname"], - } - err = f.K8sResourceManagers.DeploymentManager().UpdateAndWaitTillDeploymentIsReady(coreDNSDeployment, - utils.DefaultDeploymentReadyTimeout) - Expect(err).ToNot(HaveOccurred()) - - // Redefine primary node as node without coredns pods. Note that this node may have previously had coredns pods. - for _, n := range nodeList.Items { - if len(n.Spec.Taints) == 0 && n.Name != primaryNode.Name { - primaryNode = &n - break - } - } - fmt.Fprintf(GinkgoWriter, "primary node is %s\n", primaryNode.Name) - instanceID = k8sUtils.GetInstanceIDFromNode(*primaryNode) - primaryInstance, err = f.CloudServices.EC2().DescribeInstance(instanceID) - Expect(err).ToNot(HaveOccurred()) -}) - -var _ = AfterSuite(func() { - // Restore coredns deployment - By("restoring coredns deployment") - err = f.K8sResourceManagers.DeploymentManager().UpdateAndWaitTillDeploymentIsReady(coreDNSDeploymentCopy, - utils.DefaultDeploymentReadyTimeout) - - By("deleting test namespace") - f.K8sResourceManagers.NamespaceManager(). - DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) -}) - -// Helper Functions // -func getWarmPoolEnvVars() { - daemonset, _ := f.K8sResourceManagers.DaemonSetManager().GetDaemonSet("kube-system", "aws-node") - warmPoolKeys := [5]string{"WARM_ENI_TARGET", "MINIMUM_IP_TARGET", "WARM_IP_TARGET", "WARM_PREFIX_TARGET", - "ENABLE_DYNAMIC_WARM_POOL"} - print("----\n") - for _, key := range warmPoolKeys { - val := utils.GetEnvValueForKeyFromDaemonSet(key, daemonset) - if val != "" { - print(" -", key, " : ", val, "\n") - } else { - print(" -", key, " : not set", "\n") - } - } - print("----\n") -} - -// Basic Prometheus api call -func callPrometheus(url string) Result { - command := []string{"curl", "--silent", "-g", url} - stdout, _, err := f.K8sResourceManagers.PodManager().PodExec(utils.DefaultTestNamespace, "curl-pod", - command) - Expect(err).ToNot(HaveOccurred()) - Expect(stdout).ShouldNot(BeEmpty()) - var result Result - marshallErr := json.Unmarshal([]byte(stdout), &result) - if marshallErr != nil { - fmt.Printf("Cannot unmarshall json: %s", marshallErr) - } - return result -} - -// Gets Prometheus metrics over the duration of the test and displays them -func getMetrics(start int64, end int64) { - warmMetric := "awscni_total_ip_addresses-awscni_assigned_ip_addresses" - noAddrsMetric := "awscni_err_no_avail_addrs" - netMetric := "awscni_assigned_ip_addresses" - duration := strDurationMin(start, end) - step := "30s" - - // warmMetric - netWarmUrl := fmt.Sprintf("http://%s/api/v1/query_range?query=%s&start=%v&end=%v&step=%s", - clusterIP, warmMetric, start, end, step) - resultNetWarm := callPrometheus(netWarmUrl) - fmt.Printf("\n %s", warmMetric) - netMap := make(map[string]int) - fmt.Printf("\nMAX Warm Pool (%v) over test duration: \n", warmMetric) - for i := 0; i < len(resultNetWarm.Data.Result); i++ { - node := resultNetWarm.Data.Result[i].Metric.Node - var maxArr []int - for j := 0; j < len(resultNetWarm.Data.Result[i].Values); j++ { - val, _ := strconv.Atoi(resultNetWarm.Data.Result[i].Values[j][1].(string)) - maxArr = append(maxArr, val) - if j == len(resultNetWarm.Data.Result[i].Values)-1 { - netMap[node] = val - } - } - fmt.Printf("%v : %v \n", node, slices.Max(maxArr)) - } - fmt.Printf("\nNET Warm Pool (%s) over test duration: \n", warmMetric) - for k, v := range netMap { - fmt.Printf("%v : %v \n", k, v) - } - - // noAddrsMetric - fmt.Printf("\n %s", noAddrsMetric) - noAddrUrl := fmt.Sprintf("http://%s/api/v1/query_range?query=delta(%s[%sm])&start=%v&end=%v&step=%s", - clusterIP, noAddrsMetric, duration, start, end, step) - resultNoAddrs := callPrometheus(noAddrUrl) - fmt.Printf("\nMAX DELTA %s over test duration: \n", noAddrsMetric) - for i := 0; i < len(resultNoAddrs.Data.Result); i++ { - node := resultNoAddrs.Data.Result[i].Metric.Node - var maxArr []int - for j := 0; j < len(resultNoAddrs.Data.Result[i].Values); j++ { - val := resultNoAddrs.Data.Result[i].Values[j][1].(string) - floatVal, err := strconv.ParseFloat(val, 64) - if err != nil { - Expect(err).ToNot(HaveOccurred()) - } - maxArr = append(maxArr, int(floatVal)) - } - fmt.Printf("%v : %v \n", node, slices.Max(maxArr)) - } - - // netMetric - fmt.Printf("\n %s", netMetric) - netUrl := fmt.Sprintf("http://%s/api/v1/query_range?query=delta(%s[%sm])&start=%v&end=%v&step=%s", - clusterIP, netMetric, duration, start, end, step) - resultNet := callPrometheus(netUrl) - fmt.Printf("\nMAX DELTA %s over test duration: \n", netMetric) - for i := 0; i < len(resultNet.Data.Result); i++ { - node := resultNet.Data.Result[i].Metric.Node - var maxArr []int - for j := 0; j < len(resultNet.Data.Result[i].Values); j++ { - val := resultNet.Data.Result[i].Values[j][1].(string) - floatVal, err := strconv.ParseFloat(val, 64) - if err != nil { - Expect(err).ToNot(HaveOccurred()) - } - maxArr = append(maxArr, int(floatVal)) - } - fmt.Printf("%v : %v \n", node, slices.Max(maxArr)) - } -} - -// Gets the duration in minutes for Prometheus queries -func strDurationMin(start int64, end int64) string { - duration := (end - start) / 60 - durationMin := strconv.FormatInt(duration, 10) - print("TEST DURATION: ", duration) - return durationMin -} - -// Random operation, if preventNoChange is 0 this includes no change being a result, otherwise it will add or subtract -func randOp(replicas int, pods int) (int, string) { - if preventNoChange == 0 { - op := rand.Intn(3) - if op == 0 { - return replicas + pods, "adding" - } - if op == 1 { - return replicas - pods, "subtracting" - } else { - return replicas, "no change" - } - } else { - op := rand.Intn(2) - if op == 0 { - return replicas + pods, "adding" - } else { - return replicas - pods, "subtracting" - } - } -} - -// Tries to get a random op/number combo that actually changes the cluster. If preventNoChange is above 0, will -// attempt to get another random integer to add/subtract that is within range. This is not always possible depending on -// what iterations and randDigits is set to, so it is best to set preventNoChange to a low number if it is set at all. -// If you want to see periods of no change, set this to 0. -func randOpLoop(replicas int) (int, string, int) { - result := 0 - op := "" - randPods := 0 - for i := 0; i < preventNoChange+1; i++ { - randPods = rand.Intn(randDigits) - result, op = randOp(replicas, randPods) - if result > minPods && result < maxPods && randPods != 0 { - return result, op, randPods - } - } - return result, op, randPods -} - -func quickScale(pods int) { - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(pods). - Build() - - err := f.K8sResourceManagers. - DeploymentManager(). - UpdateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - time.Sleep(sleep) -} - -// Check on pod count outside deployment -func busyboxPodCnt() int { - podCount := 0 - podList, _ := f.K8sResourceManagers.PodManager().GetPodsWithLabelSelector("role", "test") - for _, _ = range podList.Items { - podCount += 1 - } - return podCount -} - -func checkInRange(result int) int { - replicas := result - replicas = max(replicas, minPods) - replicas = min(replicas, maxPods) - return replicas -} - -// Tries to prevent no scaling in the cluster as rand.Intn is inclusive with 0, so just scale 1 instead. -func incIf(pods int) int { - if pods == 0 && preventNoChange > 0 { - return 1 - } else { - return pods - } -} - -func max(x, y int) int { - if x < y { - return y - } - return x -} - -func min(x, y int) int { - if y < x { - return y - } - return x -}