From d5a0cf04c641420243989e560b8f291cb7235c76 Mon Sep 17 00:00:00 2001 From: Matt Matejczyk Date: Wed, 15 May 2019 15:19:01 +0200 Subject: [PATCH 1/2] Disable kube-proxy monitoring. --- .../manifests/default/kube-proxy-service.yaml | 15 ----------- .../prometheus-serviceMonitorKubeProxy.yaml | 25 ------------------- 2 files changed, 40 deletions(-) delete mode 100644 clusterloader2/pkg/prometheus/manifests/default/kube-proxy-service.yaml delete mode 100644 clusterloader2/pkg/prometheus/manifests/default/prometheus-serviceMonitorKubeProxy.yaml diff --git a/clusterloader2/pkg/prometheus/manifests/default/kube-proxy-service.yaml b/clusterloader2/pkg/prometheus/manifests/default/kube-proxy-service.yaml deleted file mode 100644 index b4bd3ddbf7..0000000000 --- a/clusterloader2/pkg/prometheus/manifests/default/kube-proxy-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - namespace: kube-system - name: kube-proxy - labels: - k8s-app: kube-proxy -spec: - type: ClusterIP - clusterIP: None - ports: - - name: http-metrics - port: 10249 - selector: - component: kube-proxy diff --git a/clusterloader2/pkg/prometheus/manifests/default/prometheus-serviceMonitorKubeProxy.yaml b/clusterloader2/pkg/prometheus/manifests/default/prometheus-serviceMonitorKubeProxy.yaml deleted file mode 100644 index 6b1caa4219..0000000000 --- a/clusterloader2/pkg/prometheus/manifests/default/prometheus-serviceMonitorKubeProxy.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - k8s-app: kube-proxy - name: kube-proxy - namespace: monitoring -spec: - endpoints: - # We modify interval depending on the cluster size to avoid collecting too many samples for - # large clusters. Also because the tests run longer in bigger clusters we don't need to - # collect them as often as in smaller clusters. For example if in 100 node cluster the test - # takes 10min it's useful to gather metrics every 30s. On the other hand if the test takes - # 10h collecting metrics every 5min is still good enough. The expression below should give us - # 30s interval for small clusters (up to 500 nodes), 2.5min interval for 2K node clusters and - # 5.5min interval for 5K node clusters. - - interval: {{MultiplyInt 30 (AddInt 1 (DivideInt .Nodes 500))}}s - port: http-metrics - jobLabel: k8s-app - namespaceSelector: - matchNames: - - kube-system - selector: - matchLabels: - k8s-app: kube-proxy From f53661331c92a9892b0bfcee647ab247de0b8b0c Mon Sep 17 00:00:00 2001 From: Matt Matejczyk Date: Wed, 15 May 2019 15:19:27 +0200 Subject: [PATCH 2/2] Scrape coredns every 10s. Scraping coredns is relatively cheap (e.g. comparing to kube-proxy), we can do it more often. --- .../manifests/default/prometheus-serviceMonitorCoreDNS.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterloader2/pkg/prometheus/manifests/default/prometheus-serviceMonitorCoreDNS.yaml b/clusterloader2/pkg/prometheus/manifests/default/prometheus-serviceMonitorCoreDNS.yaml index a454301226..b66861f955 100644 --- a/clusterloader2/pkg/prometheus/manifests/default/prometheus-serviceMonitorCoreDNS.yaml +++ b/clusterloader2/pkg/prometheus/manifests/default/prometheus-serviceMonitorCoreDNS.yaml @@ -8,7 +8,7 @@ metadata: spec: endpoints: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - interval: 30s + interval: 10s port: metrics jobLabel: k8s-app namespaceSelector: