diff --git a/clusterloader2/pkg/config/codec.go b/clusterloader2/pkg/config/codec.go index f1c03bc63d..0a2c25212c 100644 --- a/clusterloader2/pkg/config/codec.go +++ b/clusterloader2/pkg/config/codec.go @@ -18,7 +18,9 @@ package config import ( "bytes" + "errors" "fmt" + "strings" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/yaml" @@ -26,6 +28,12 @@ import ( "k8s.io/perf-tests/clusterloader2/api" ) +var ( + // ErrorEmptyFile indicates that manifest file was empty. + // Useful to distinguish where the manifast was empty or malformed. + ErrorEmptyFile = errors.New("emptyfile") +) + // convertToConfig converts array of bytes into test config. func convertToConfig(raw []byte) (*api.Config, error) { var config api.Config @@ -37,6 +45,9 @@ func convertToConfig(raw []byte) (*api.Config, error) { // convertToObject converts array of bytes into unstructured object. func convertToObject(raw []byte) (*unstructured.Unstructured, error) { + if isEmpty(raw) { + return nil, ErrorEmptyFile + } obj := &unstructured.Unstructured{} _, _, err := scheme.Codecs.UniversalDeserializer().Decode(raw, nil, obj) if err != nil { @@ -51,3 +62,7 @@ func decodeInto(raw []byte, v interface{}) error { } return nil } + +func isEmpty(raw []byte) bool { + return strings.TrimSpace(string(raw[:])) == "" +} diff --git a/clusterloader2/pkg/framework/framework.go b/clusterloader2/pkg/framework/framework.go index 38f4666226..a3b3e241e6 100644 --- a/clusterloader2/pkg/framework/framework.go +++ b/clusterloader2/pkg/framework/framework.go @@ -189,23 +189,26 @@ func (f *Framework) ApplyTemplatedManifests(manifestGlob string, templateMapping klog.Infof("Applying %s\n", manifest) obj, err := templateProvider.TemplateToObject(filepath.Base(manifest), templateMapping) if err != nil { + if err == config.ErrorEmptyFile { + klog.Warningf("Skipping empty manifest %s", manifest) + continue + } return err } + objList := []unstructured.Unstructured{*obj} if obj.IsList() { - objList, err := obj.ToList() + list, err := obj.ToList() if err != nil { return err } - for _, item := range objList.Items { - if err := f.CreateObject(item.GetNamespace(), item.GetName(), &item, options...); err != nil { - return fmt.Errorf("error while applying (%s): %v", manifest, err) - } - } - } else { - if err := f.CreateObject(obj.GetNamespace(), obj.GetName(), obj, options...); err != nil { + objList = list.Items + } + for _, item := range objList { + if err := f.CreateObject(item.GetNamespace(), item.GetName(), &item, options...); err != nil { return fmt.Errorf("error while applying (%s): %v", manifest, err) } } + } return nil } diff --git a/clusterloader2/pkg/prometheus/experimental.go b/clusterloader2/pkg/prometheus/experimental.go index 545a418c42..3bce59c821 100644 --- a/clusterloader2/pkg/prometheus/experimental.go +++ b/clusterloader2/pkg/prometheus/experimental.go @@ -19,12 +19,12 @@ package prometheus import ( "encoding/json" "fmt" - "github.com/spf13/pflag" - "k8s.io/apimachinery/pkg/util/wait" "os/exec" "time" + "github.com/spf13/pflag" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog" ) diff --git a/clusterloader2/pkg/prometheus/manifests/dashboards/network-programming-latency.json b/clusterloader2/pkg/prometheus/manifests/dashboards/network-programming-latency.json index 422da6c57f..8a01a573ae 100644 --- a/clusterloader2/pkg/prometheus/manifests/dashboards/network-programming-latency.json +++ b/clusterloader2/pkg/prometheus/manifests/dashboards/network-programming-latency.json @@ -67,21 +67,21 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_latency_seconds_bucket[10m])) by (le))", + "expr": "kubeproxy:kubeproxy_network_programming_duration:histogram_quantile{quantile='0.99'}", "format": "time_series", "intervalFactor": 1, "legendFormat": "99pctl", "refId": "A" }, { - "expr": "histogram_quantile(0.95, sum(rate(kubeproxy_network_programming_latency_seconds_bucket[10m])) by (le))", + "expr": "kubeproxy:kubeproxy_network_programming_duration:histogram_quantile{quantile='0.90'}", "format": "time_series", "intervalFactor": 1, "legendFormat": "90pctl", "refId": "B" }, { - "expr": "histogram_quantile(0.50, sum(rate(kubeproxy_network_programming_latency_seconds_bucket[10m])) by (le))", + "expr": "kubeproxy:kubeproxy_network_programming_duration:histogram_quantile{quantile='0.50'}", "format": "time_series", "intervalFactor": 1, "legendFormat": "50pctl", @@ -109,10 +109,10 @@ "yaxes": [ { "format": "short", - "label": null, + "label": "seconds", "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { diff --git a/clusterloader2/pkg/prometheus/manifests/default/kube-proxy-service.yaml b/clusterloader2/pkg/prometheus/manifests/default/kube-proxy-service.yaml new file mode 100644 index 0000000000..9eeedcf654 --- /dev/null +++ b/clusterloader2/pkg/prometheus/manifests/default/kube-proxy-service.yaml @@ -0,0 +1,19 @@ +{{$PROMETHEUS_SCRAPE_KUBE_PROXY := DefaultParam .PROMETHEUS_SCRAPE_KUBE_PROXY false}} + +{{if $PROMETHEUS_SCRAPE_KUBE_PROXY}} +apiVersion: v1 +kind: Service +metadata: + namespace: kube-system + name: kube-proxy + labels: + k8s-app: kube-proxy +spec: + type: ClusterIP + clusterIP: None + ports: + - name: http-metrics + port: 10249 + selector: + component: kube-proxy +{{end}} diff --git a/clusterloader2/pkg/prometheus/manifests/default/prometheus-serviceMonitorKubeProxy.yaml b/clusterloader2/pkg/prometheus/manifests/default/prometheus-serviceMonitorKubeProxy.yaml new file mode 100644 index 0000000000..d99854bd85 --- /dev/null +++ b/clusterloader2/pkg/prometheus/manifests/default/prometheus-serviceMonitorKubeProxy.yaml @@ -0,0 +1,27 @@ +{{$PROMETHEUS_SCRAPE_KUBE_PROXY := DefaultParam .PROMETHEUS_SCRAPE_KUBE_PROXY false}} + +{{if $PROMETHEUS_SCRAPE_KUBE_PROXY}} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kube-proxy + name: kube-proxy + namespace: monitoring +spec: + endpoints: + # We modify interval depending on the cluster size to avoid collecting too many samples for + # large clusters. Also because the tests run longer in bigger clusters we don't need to + # collect them as often as in smaller clusters. We use 30s interval for small clusters + # (# nodes <= 1000) and 1min interval in big clusters (# nodes > 1000) + # TODO(mm4tt): Once we prove the interval works in big clusters, simplify the expression. + - interval: {{MinInt 60 (MultiplyInt 30 (AddInt 1 (DivideInt .Nodes 1001)))}}s + port: http-metrics + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: kube-proxy +{{end}} diff --git a/clusterloader2/pkg/prometheus/manifests/prometheus-rules.yaml b/clusterloader2/pkg/prometheus/manifests/prometheus-rules.yaml index f945ca533f..2c6c689d21 100644 --- a/clusterloader2/pkg/prometheus/manifests/prometheus-rules.yaml +++ b/clusterloader2/pkg/prometheus/manifests/prometheus-rules.yaml @@ -42,3 +42,20 @@ spec: record: probes:in_cluster_network_latency:histogram_quantile labels: quantile: "0.50" + - name: kube-proxy.rules + rules: + - expr: | + histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket[5m])) by (le)) + record: kubeproxy:kubeproxy_network_programming_duration:histogram_quantile + labels: + quantile: "0.99" + - expr: | + histogram_quantile(0.90, sum(rate(kubeproxy_network_programming_duration_seconds_bucket[5m])) by (le)) + record: kubeproxy:kubeproxy_network_programming_duration:histogram_quantile + labels: + quantile: "0.90" + - expr: | + histogram_quantile(0.50, sum(rate(kubeproxy_network_programming_duration_seconds_bucket[5m])) by (le)) + record: kubeproxy:kubeproxy_network_programming_duration:histogram_quantile + labels: + quantile: "0.50" diff --git a/clusterloader2/testing/prometheus/scrape-kube-proxy.yaml b/clusterloader2/testing/prometheus/scrape-kube-proxy.yaml new file mode 100644 index 0000000000..5334ba16c2 --- /dev/null +++ b/clusterloader2/testing/prometheus/scrape-kube-proxy.yaml @@ -0,0 +1 @@ +PROMETHEUS_SCRAPE_KUBE_PROXY: true