From 8c033f81644d1802a9e755f665895481c9890d40 Mon Sep 17 00:00:00 2001 From: Marie Roque Date: Wed, 20 Sep 2023 16:15:05 +0200 Subject: [PATCH 1/4] Adapt scrape targets to EKS clusters --- CHANGELOG.md | 4 + .../input/case-5-cluster-api-v1alpha3.golden | 2 + .../input/case-6-cluster-api-eks.golden | 13 + .../case-6-cluster-api-eks.golden | 488 +++++++++++++++++ .../case-6-cluster-api-eks.golden | 65 +++ .../test/case-6-cluster-api-eks.golden | 6 + .../test/case-6-cluster-api-eks.golden | 32 ++ .../test/case-6-cluster-api-eks.golden | 29 + .../case-6-cluster-api-eks.golden | 32 ++ .../externaldns/case-6-cluster-api-eks.golden | 31 ++ .../case-6-cluster-api-eks.golden | 30 ++ .../test/case-6-cluster-api-eks.golden | 122 +++++ .../test/case-6-cluster-api-eks.golden | 32 ++ .../externaldns/case-6-cluster-api-eks.golden | 34 ++ .../monitoring/scrapeconfigs/resource.go | 84 +-- .../monitoring/scrapeconfigs/resource_test.go | 6 + .../test/aws/case-6-cluster-api-eks.golden | 490 +++++++++++++++++ .../test/azure/case-6-cluster-api-eks.golden | 500 ++++++++++++++++++ .../test/capa/case-6-cluster-api-eks.golden | 444 ++++++++++++++++ .../test/gcp/case-6-cluster-api-eks.golden | 444 ++++++++++++++++ .../test/kvm/case-6-cluster-api-eks.golden | 500 ++++++++++++++++++ .../openstack/case-6-cluster-api-eks.golden | 444 ++++++++++++++++ .../test/case-6-cluster-api-eks.golden | 28 + .../test/case-6-cluster-api-eks.golden | 12 + service/key/key.go | 14 + 25 files changed, 3852 insertions(+), 34 deletions(-) create mode 100644 pkg/unittest/input/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/alerting/alertmanagerconfig/test/alertmanager-config/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/alerting/alertmanagerconfig/test/notification-template/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/alerting/alertmanagerwiring/test/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/alerting/heartbeatwebhookconfig/test/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/ingress/test/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/ingress/test/externaldns-with-restricted-access/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/ingress/test/externaldns/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/ingress/test/restricted-access/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/prometheus/test/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/remotewriteingress/test/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/remotewriteingress/test/externaldns/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/monitoring/verticalpodautoscaler/test/case-6-cluster-api-eks.golden create mode 100644 service/controller/resource/namespace/test/case-6-cluster-api-eks.golden diff --git a/CHANGELOG.md b/CHANGELOG.md index 570ee5fc2..e25d952a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Adapt scrape targets to EKS clusters. + ## [4.48.0] - 2023-09-19 ### Changed diff --git a/pkg/unittest/input/case-5-cluster-api-v1alpha3.golden b/pkg/unittest/input/case-5-cluster-api-v1alpha3.golden index 13217690f..31a56ba5c 100644 --- a/pkg/unittest/input/case-5-cluster-api-v1alpha3.golden +++ b/pkg/unittest/input/case-5-cluster-api-v1alpha3.golden @@ -9,3 +9,5 @@ spec: controlPlaneEndpoint: host: master.baz port: 443 + infrastructureRef: + kind: AWSCluster diff --git a/pkg/unittest/input/case-6-cluster-api-eks.golden b/pkg/unittest/input/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..d70bba102 --- /dev/null +++ b/pkg/unittest/input/case-6-cluster-api-eks.golden @@ -0,0 +1,13 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + labels: + "release.giantswarm.io/version": 18.0.0 + name: eks-sample + namespace: org-my-organization +spec: + controlPlaneEndpoint: + host: master.eks-sample + port: 443 + infrastructureRef: + kind: AWSManagedCluster diff --git a/service/controller/resource/alerting/alertmanagerconfig/test/alertmanager-config/case-6-cluster-api-eks.golden b/service/controller/resource/alerting/alertmanagerconfig/test/alertmanager-config/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..000b738d0 --- /dev/null +++ b/service/controller/resource/alerting/alertmanagerconfig/test/alertmanager-config/case-6-cluster-api-eks.golden @@ -0,0 +1,488 @@ +global: + resolve_timeout: 5m + slack_api_url: https://slack + +templates: +- '/etc/alertmanager/config/*.tmpl' + +route: + group_by: [alertname, cluster_id, installation, status] + group_interval: 15m + group_wait: 30s + repeat_interval: 4h + receiver: root + + routes: + + # Falco noise Slack + - receiver: falco_noise_slack + matchers: + - alertname=~"Falco.*" + continue: false + + - receiver: team_tinkerers_slack + matchers: + - severity=~"page|notify" + - team="tinkerers" + continue: false + + # Team Ops Opsgenie + - receiver: opsgenie_router + matchers: + - severity="page" + continue: true + + # Service Level slack -- chooses the slack channel based on the provider + - receiver: team_phoenix_slack + matchers: + - alertname="ServiceLevelBurnRateTooHigh" + continue: false + + # Team Atlas Slack + - receiver: team_atlas_slack + matchers: + - severity=~"page|notify" + - team="atlas" + - type!="heartbeat" + - alertname!~"Inhibition.*" + continue: false + + # Team Celestial Slack + - receiver: team_phoenix_slack + matchers: + - severity=~"page|notify" + - team="celestial" + - sloth_severity=~"page|ticket" + continue: false + + # Team Firecracker Slack + - receiver: team_phoenix_slack + matchers: + - severity=~"page|notify" + - team="firecracker" + - sloth_severity=~"page|ticket" + continue: false + + # Team Phoenix Slack + - receiver: team_phoenix_slack + matchers: + - severity=~"page|notify" + - team="phoenix" + - sloth_severity=~"page|ticket" + continue: false + + # Team Shield Slack + - receiver: team_shield_slack + matchers: + - severity=~"page|notify" + - team="shield" + continue: false + + # Team BigMac Slack + - receiver: team_bigmac_slack + matchers: + - severity=~"page|notify" + - team="bigmac" + continue: false + + # Team Clippy Slack + # ReRoute to `phoenix` until we change all team ownership labels + - receiver: team_phoenix_slack + matchers: + - severity=~"page|notify" + - team="clippy" + continue: false + + # Team Rocket Slack + - receiver: team_rocket_slack + matchers: + - severity=~"page|notify" + - team="rocket" + continue: false + + # Team Ops Slack + - receiver: team_ops_slack + matchers: + - severity=~"page|notify" + continue: true + + # Team Turtles Slack + - receiver: team_turtles_slack + matchers: + - severity=~"page|notify" + - team="turtles" + continue: false + +receivers: +- name: root + +- name: falco_noise_slack + slack_configs: + - channel: '#noise-falco' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" .}}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_atlas_slack + slack_configs: + - channel: '#alert-atlas-test' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" .}}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_phoenix_slack + slack_configs: + - channel: '#alert-phoenix-test' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" . }}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_bigmac_slack + slack_configs: + - channel: '#alert-bigmac-test' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" . }}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_rocket_slack + slack_configs: + - channel: '#alert-rocket-test' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" . }}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_shield_slack + slack_configs: + - channel: '#alert-shield' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" .}}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_turtles_slack + slack_configs: + - channel: '#alert-turtles-test' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" . }}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_tinkerers_slack + slack_configs: + - channel: '#alert-tinkerers' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" .}}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: opsgenie_router + opsgenie_configs: + - api_key: opsgenie-key + tags: "{{ (index .Alerts 0).Labels.alertname }},{{ (index .Alerts 0).Labels.cluster_type }},{{ (index .Alerts 0).Labels.severity }},{{ (index .Alerts 0).Labels.team }},{{ (index .Alerts 0).Labels.area }},{{ (index .Alerts 0).Labels.service_priority }},aws,test-installation,testing" + +- name: team_ops_slack + slack_configs: + - channel: '#alert-test-test-installation' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" . }}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +inhibit_rules: +- source_matchers: + - inhibit_kube_state_metrics_down=true + target_matchers: + - cancel_if_kube_state_metrics_down=true + equal: [cluster_id] + +- source_matchers: + - inhibit_kube_state_metrics_down=true + - cluster_id=test-installation + target_matchers: + - cancel_if_mc_kube_state_metrics_down=true + +- source_matchers: + - inhibit_kube_state_metrics_down=true + target_matchers: + - cancel_if_any_kube_state_metrics_down=true + +- source_matchers: + - cluster_status_creating=true + target_matchers: + - cancel_if_cluster_status_creating=true + equal: [cluster_id] + +- source_matchers: + - cluster_status_created=true + target_matchers: + - cancel_if_cluster_status_created=true + equal: [cluster_id] + +- source_matchers: + - cluster_status_updating=true + target_matchers: + - cancel_if_cluster_status_updating=true + equal: [cluster_id] + +- source_matchers: + - cluster_status_updated=true + target_matchers: + - cancel_if_cluster_status_updated=true + equal: [cluster_id] + +- source_matchers: + - cluster_status_deleting=true + target_matchers: + - cancel_if_cluster_status_deleting=true + equal: [cluster_id] + +- source_matchers: + - cluster_with_no_nodepools=true + target_matchers: + - cancel_if_cluster_with_no_nodepools=true + equal: [cluster_id] + +- source_matchers: + - cluster_with_scaling_nodepools=true + target_matchers: + - cancel_if_cluster_with_scaling_nodepools=true + equal: [cluster_id] + +- source_matchers: + - cluster_with_notready_nodepools=true + target_matchers: + - cancel_if_cluster_with_notready_nodepools=true + equal: [cluster_id] + +- source_matchers: + - instance_state_not_running=true + target_matchers: + - cancel_if_instance_state_not_running=true + equal: [node] + +- source_matchers: + - kiam_has_errors=true + target_matchers: + - cancel_if_kiam_has_errors=true + equal: [cluster_id] + +- source_matchers: + - kubelet_down=true + target_matchers: + - cancel_if_kubelet_down=true + equal: [cluster_id, ip] + +- source_matchers: + - kubelet_down=true + target_matchers: + - cancel_if_any_kubelet_down=true + equal: [cluster_id] + +- source_matchers: + - kubelet_not_ready=true + target_matchers: + - cancel_if_kubelet_not_ready=true + equal: [cluster_id, ip] + +- source_matchers: + - kubelet_not_ready=true + target_matchers: + - cancel_if_any_kubelet_not_ready=true + equal: [cluster_id] + +- source_matchers: + - nodes_down=true + target_matchers: + - cancel_if_nodes_down=true + equal: [cluster_id] + +- source_matchers: + - scrape_timeout=true + target_matchers: + - cancel_if_scrape_timeout=true + equal: [cluster_id, instance] + +- source_matchers: + - control_plane_node_down=true + target_matchers: + - cancel_if_control_plane_node_down=true + equal: [cluster_id] + +- source_matchers: + - apiserver_down=true + target_matchers: + - cancel_if_apiserver_down=true + equal: [cluster_id] + +- source_matchers: + - apiserver_down=true + target_matchers: + - cancel_if_any_apiserver_down=true + +- source_matchers: + - outside_working_hours=true + target_matchers: + - cancel_if_outside_working_hours=true + +- source_matchers: + - has_worker_nodes=false + target_matchers: + - cancel_if_cluster_has_no_workers=true + equal: [cluster_id] + +- source_matchers: + - cluster_is_not_running_prometheus_agent=true + target_matchers: + - cancel_if_cluster_is_not_running_prometheus_agent=true + equal: [cluster_id] + +- source_matchers: + - inhibit_prometheus_agent_down=true + target_matchers: + - cancel_if_prometheus_agent_down=true + equal: [cluster_id] + +- source_matchers: + - stack_failed=true + target_matchers: + - cancel_if_stack_failed=true diff --git a/service/controller/resource/alerting/alertmanagerconfig/test/notification-template/case-6-cluster-api-eks.golden b/service/controller/resource/alerting/alertmanagerconfig/test/notification-template/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..9b85e2b02 --- /dev/null +++ b/service/controller/resource/alerting/alertmanagerconfig/test/notification-template/case-6-cluster-api-eks.golden @@ -0,0 +1,65 @@ +{{ define "__alertmanager" }}Alertmanager{{ end }} +{{ define "__alertmanagerurl" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}&silenced=false&inhibited=false&active=true&filter=%7Balertname%3D%22{{ .CommonLabels.alertname }}%22%7D{{ end }} +{{ define "__dashboardurl" -}}https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}{{- end }} +{{ define "__runbookurl" -}}https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}{{- end }} + +{{ define "slack.default.title" }}{{ .Status | toUpper }}[{{ if eq .Status "firing" }}{{ .Alerts.Firing | len }}{{- else }}{{ .Alerts.Resolved | len }}{{- end }}] {{ (index .Alerts 0).Labels.alertname }} - Team {{ (index .Alerts 0).Labels.team }}{{ end }} +{{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }} +{{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }} +{{ define "slack.default.pretext" }}{{ end }} +{{ define "slack.default.titlelink" }}{{ template "__alertmanagerurl" . }}{{ end }} +{{ define "slack.default.iconemoji" }}{{ end }} +{{ define "slack.default.iconurl" }}{{ end }} +{{ define "slack.default.text" }}*Cluster:* {{ (index .Alerts 0).Labels.installation }}{{ if (index .Alerts 0).Labels.cluster_id }} / {{ (index .Alerts 0).Labels.cluster_id }}{{ end }}{{ if (index .Alerts 0).Labels.service }} / {{ (index .Alerts 0).Labels.service }}{{ end }} +*Area:* {{ (index .Alerts 0).Labels.area }} / {{ (index .Alerts 0).Labels.topic }} +{{- if (index .Alerts 0).Annotations.description }} +*Instances* +{{ if eq .Status "firing" }} +{{ range .Alerts.Firing }} +:fire: {{ if .Labels.instance }}{{ .Labels.instance }}: {{ end }}{{ .Annotations.description }}{{- end }} +{{ else }} +{{ range .Alerts.Resolved }} +:success: {{ if .Labels.instance }}{{ .Labels.instance }}: {{ end }}{{ .Annotations.description }}{{- end }} +{{ end }} +{{- end }} +{{ end }} + + +{{ define "opsgenie.default.message" }}{{ .GroupLabels.installation }} / {{ .GroupLabels.cluster_id }}{{ if (index .Alerts 0).Labels.service }} / {{ (index .Alerts 0).Labels.service }}{{ end }} - {{ index (index .Alerts.Firing 0).Labels `alertname`}}{{ end }} +{{ define "opsgenie.default.source" }}{{ template "__alertmanager" . }}{{ end }} +{{ define "opsgenie.default.description" }}* Team: {{ (index .Alerts 0).Labels.team }} +* Area: {{ (index .Alerts 0).Labels.area }} / {{ (index .Alerts 0).Labels.topic }} + +* Instances:{{ range .Alerts.Firing }} +🔥 {{ if .Labels.instance }}{{ .Labels.instance }}: {{ end }}{{ .Annotations.description }}{{ end }} + +--- + +{{ if (index .Alerts 0).Annotations.opsrecipe }}📗 Runbook: {{ template "__runbookurl" . }}{{- end }} +🔔 Alertmanager {{ template "__alertmanagerurl" . }} +{{- if (index .Alerts 0).Annotations.dashboard }}📈 Dashboard: {{ template "__dashboardurl" . }}{{- end }} +👀 Prometheus: {{ (index .Alerts 0).GeneratorURL }} + +--- + +{{ if not (index .Alerts 0).Annotations.opsrecipe }}⚠️ There is no **runbook** for this alert, time to get your pen.{{- end }} +{{ if not (index .Alerts 0).Annotations.dashboard }}⚠️ There is no **dashboard** for this alert, time to sketch.{{- end }} +{{- end }} + +# This builds the silence URL. We exclude the alertname in the range +# to avoid the issue of having trailing comma separator (%2C) at the end +# of the generated URL +{{ define "__alert_silence_link" -}} + {{ .ExternalURL }}/#/silences/new?filter=%7B + {{- range .CommonLabels.SortedPairs -}} + {{- if ne .Name "alertname" -}} + {{- .Name }}%3D"{{- .Value -}}"%2C%20 + {{- end -}} + {{- end -}} + alertname%3D"{{ .CommonLabels.alertname }}"%7D +{{- end }} + +# Link to related PMs +{{ define "__alert_linked_postmortems" -}} +https://github.com/giantswarm/giantswarm/issues?q=is%3Aissue+is%3Aopen+label%3Apostmortem+label%3Aalert%2F{{ .CommonLabels.alertname }} +{{- end }} diff --git a/service/controller/resource/alerting/alertmanagerwiring/test/case-6-cluster-api-eks.golden b/service/controller/resource/alerting/alertmanagerwiring/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..423d6dbad --- /dev/null +++ b/service/controller/resource/alerting/alertmanagerwiring/test/case-6-cluster-api-eks.golden @@ -0,0 +1,6 @@ +- static_configs: + - targets: + - alertmanager-operated.monitoring.svc:9093 + scheme: http + timeout: 10s + api_version: v2 diff --git a/service/controller/resource/alerting/heartbeatwebhookconfig/test/case-6-cluster-api-eks.golden b/service/controller/resource/alerting/heartbeatwebhookconfig/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..ada1003c2 --- /dev/null +++ b/service/controller/resource/alerting/heartbeatwebhookconfig/test/case-6-cluster-api-eks.golden @@ -0,0 +1,32 @@ +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: alertmanager + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: alertmanager + name: eks-sample + namespace: monitoring +spec: + receivers: + - name: heartbeat_test-installation_eks-sample + webhookConfigs: + - httpConfig: + authorization: + credentials: + key: opsGenieApiKey + name: alertmanager-global + type: GenieKey + sendResolved: false + url: https://api.opsgenie.com/v2/heartbeats/test-installation-eks-sample/ping + route: + groupInterval: 30s + groupWait: 30s + matchers: + - name: cluster_id + value: eks-sample + - name: installation + value: test-installation + - name: type + value: heartbeat + receiver: heartbeat_test-installation_eks-sample + repeatInterval: 15m diff --git a/service/controller/resource/monitoring/ingress/test/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/ingress/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..8b7e19125 --- /dev/null +++ b/service/controller/resource/monitoring/ingress/test/case-6-cluster-api-eks.golden @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/auth-signin: https://$host/oauth2/start?rd=$escaped_request_uri + nginx.ingress.kubernetes.io/auth-url: https://$host/oauth2/auth + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/ingress/test/externaldns-with-restricted-access/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/ingress/test/externaldns-with-restricted-access/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..4f2c8fbaa --- /dev/null +++ b/service/controller/resource/monitoring/ingress/test/externaldns-with-restricted-access/case-6-cluster-api-eks.golden @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + external-dns.alpha.kubernetes.io/hostname: prometheus.3lkdj.test.gigantic.io + giantswarm.io/external-dns: managed + nginx.ingress.kubernetes.io/auth-signin: https://$host/oauth2/start?rd=$escaped_request_uri + nginx.ingress.kubernetes.io/auth-url: https://$host/oauth2/auth + nginx.ingress.kubernetes.io/whitelist-source-range: 21.10.178/24 + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus.3lkdj.test.gigantic.io + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/ingress/test/externaldns/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/ingress/test/externaldns/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..0e6a5794a --- /dev/null +++ b/service/controller/resource/monitoring/ingress/test/externaldns/case-6-cluster-api-eks.golden @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + external-dns.alpha.kubernetes.io/hostname: prometheus + giantswarm.io/external-dns: managed + nginx.ingress.kubernetes.io/auth-signin: https://$host/oauth2/start?rd=$escaped_request_uri + nginx.ingress.kubernetes.io/auth-url: https://$host/oauth2/auth + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/ingress/test/restricted-access/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/ingress/test/restricted-access/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..45edc7a80 --- /dev/null +++ b/service/controller/resource/monitoring/ingress/test/restricted-access/case-6-cluster-api-eks.golden @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/auth-signin: https://$host/oauth2/start?rd=$escaped_request_uri + nginx.ingress.kubernetes.io/auth-url: https://$host/oauth2/auth + nginx.ingress.kubernetes.io/whitelist-source-range: 21.10.178/24 + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/prometheus/test/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/prometheus/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..0781d34ec --- /dev/null +++ b/service/controller/resource/monitoring/prometheus/test/case-6-cluster-api-eks.golden @@ -0,0 +1,122 @@ +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: eks-sample + namespace: eks-sample-prometheus +spec: + additionalAlertManagerConfigs: + key: alertmanager-additional.yaml + name: alertmanager-config + additionalScrapeConfigs: + key: prometheus-additional.yaml + name: additional-scrape-configs + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: DoesNotExist + apiserverConfig: + bearerTokenFile: /etc/prometheus/secrets/cluster-certificates/token + host: https://master.eks-sample:443 + tlsConfig: + ca: {} + caFile: /etc/prometheus/secrets/cluster-certificates/ca + cert: {} + arbitraryFSAccessThroughSMs: {} + enableFeatures: + - remote-write-receiver + evaluationInterval: 60s + externalLabels: + cluster_id: eks-sample + cluster_type: workload_cluster + customer: Giant Swarm + installation: test-installation + pipeline: testing + provider: provider + region: onprem + externalUrl: http://prometheus/eks-sample + image: quay.io/giantswarm/prometheus:v2.28.1 + logLevel: debug + podMetadata: + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + giantswarm.io/monitoring: "true" + priorityClassName: prometheus + replicas: 1 + resources: + limits: + cpu: 150m + memory: "1073741824" + requests: + cpu: 100m + memory: "1073741824" + retention: 2w + retentionSize: 85GiB + routePrefix: /eks-sample + ruleNamespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: Exists + ruleSelector: + matchExpressions: + - key: cluster_type + operator: NotIn + values: + - management_cluster + - key: application.giantswarm.io/team + operator: Exists + rules: + alert: {} + scrapeInterval: 60s + secrets: + - cluster-certificates + securityContext: + fsGroup: 2000 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 1000 + serviceMonitorNamespaceSelector: + matchExpressions: + - key: nonexistentkey + operator: Exists + serviceMonitorSelector: + matchExpressions: + - key: nonexistentkey + operator: Exists + storage: + volumeClaimTemplate: + metadata: {} + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + status: {} + topologySpreadConstraints: + - labelSelector: + matchLabels: + app.kubernetes.io/name: prometheus + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + tsdb: {} + version: v2.28.1 + walCompression: true + web: + pageTitle: test-installation/eks-sample Prometheus +status: + availableReplicas: 0 + paused: false + replicas: 0 + unavailableReplicas: 0 + updatedReplicas: 0 diff --git a/service/controller/resource/monitoring/remotewriteingress/test/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/remotewriteingress/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..8c483e42c --- /dev/null +++ b/service/controller/resource/monitoring/remotewriteingress/test/case-6-cluster-api-eks.golden @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/auth-realm: Authentication Required + nginx.ingress.kubernetes.io/auth-secret: remote-write-ingress-auth + nginx.ingress.kubernetes.io/auth-type: basic + nginx.ingress.kubernetes.io/client-body-buffer-size: 50m + nginx.ingress.kubernetes.io/proxy-body-size: 50m + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample-remote-write + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample/api/v1/write + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/remotewriteingress/test/externaldns/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/remotewriteingress/test/externaldns/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..73e671d42 --- /dev/null +++ b/service/controller/resource/monitoring/remotewriteingress/test/externaldns/case-6-cluster-api-eks.golden @@ -0,0 +1,34 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + external-dns.alpha.kubernetes.io/hostname: prometheus + giantswarm.io/external-dns: managed + nginx.ingress.kubernetes.io/auth-realm: Authentication Required + nginx.ingress.kubernetes.io/auth-secret: remote-write-ingress-auth + nginx.ingress.kubernetes.io/auth-type: basic + nginx.ingress.kubernetes.io/client-body-buffer-size: 50m + nginx.ingress.kubernetes.io/proxy-body-size: 50m + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample-remote-write + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample/api/v1/write + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/scrapeconfigs/resource.go b/service/controller/resource/monitoring/scrapeconfigs/resource.go index cdf40c554..7346ebc0e 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/resource.go +++ b/service/controller/resource/monitoring/scrapeconfigs/resource.go @@ -262,48 +262,64 @@ func getObservabilityBundleAppVersion(ctx context.Context, ctrlClient client.Cli func listTargetsToIgnore(ctx context.Context, ctrlClient client.Client, cluster metav1.Object, config Config) ([]string, error) { ignoredTargets := make([]string, 0) - appVersion, err := getObservabilityBundleAppVersion(ctx, ctrlClient, cluster, config) - if err != nil { - return nil, microerror.Mask(err) - } + if key.IsEKSCluster(cluster) { + // In case of EKS clusters, we assume scraping targets via ServiceMonitors, + // so we ignore them from the Prometheus scrape config + config.Logger.Debugf(ctx, "EKS cluster: ignoring all scraping targets in Prometheus scrape config") + ignoredTargets = append(ignoredTargets, + "prometheus-operator-app", + "kube-apiserver", + "kube-controller-manager", + "kube-scheduler", + "node-exporter", + "kubelet", + "coredns", + "kube-state-metrics", + "etcd") + } else { + appVersion, err := getObservabilityBundleAppVersion(ctx, ctrlClient, cluster, config) + if err != nil { + return nil, microerror.Mask(err) + } - version, err := semver.Parse(appVersion) - if err != nil { - return nil, microerror.Mask(err) - } + version, err := semver.Parse(appVersion) + if err != nil { + return nil, microerror.Mask(err) + } - initialBundleVersion, err := semver.Parse("0.1.0") - if err != nil { - return nil, microerror.Mask(err) - } + initialBundleVersion, err := semver.Parse("0.1.0") + if err != nil { + return nil, microerror.Mask(err) + } - bundleWithKSMAndExportersVersion, err := semver.Parse("0.4.0") - if err != nil { - return nil, microerror.Mask(err) - } + bundleWithKSMAndExportersVersion, err := semver.Parse("0.4.0") + if err != nil { + return nil, microerror.Mask(err) + } - if version.GTE(initialBundleVersion) { - ignoredTargets = append(ignoredTargets, "prometheus-operator-app", "kube-apiserver", "kube-controller-manager", "kube-scheduler", "node-exporter") - } + if version.GTE(initialBundleVersion) { + ignoredTargets = append(ignoredTargets, "prometheus-operator-app", "kube-apiserver", "kube-controller-manager", "kube-scheduler", "node-exporter") + } - if version.GTE(bundleWithKSMAndExportersVersion) { - ignoredTargets = append(ignoredTargets, "kubelet", "coredns", "kube-state-metrics") + if version.GTE(bundleWithKSMAndExportersVersion) { + ignoredTargets = append(ignoredTargets, "kubelet", "coredns", "kube-state-metrics") - if key.IsCAPIManagementCluster(config.Provider) { - ignoredTargets = append(ignoredTargets, "etcd") + if key.IsCAPIManagementCluster(config.Provider) { + ignoredTargets = append(ignoredTargets, "etcd") + } } - } - // Vintage WC - if !key.IsCAPIManagementCluster(config.Provider) && !key.IsManagementCluster(config.Installation, cluster) { - // Since 18.0.0 we cannot scrape k8s endpoints externally so we ignore those targets. - release := cluster.GetLabels()["release.giantswarm.io/version"] - version, err := semver.Parse(release) - if err != nil { - return nil, microerror.Mask(err) - } - if version.Major >= 18 { - ignoredTargets = append(ignoredTargets, "kube-controller-manager", "kube-scheduler") + // Vintage WC + if !key.IsCAPIManagementCluster(config.Provider) && !key.IsManagementCluster(config.Installation, cluster) { + // Since 18.0.0 we cannot scrape k8s endpoints externally so we ignore those targets. + release := cluster.GetLabels()["release.giantswarm.io/version"] + version, err := semver.Parse(release) + if err != nil { + return nil, microerror.Mask(err) + } + if version.Major >= 18 { + ignoredTargets = append(ignoredTargets, "kube-controller-manager", "kube-scheduler") + } } } return ignoredTargets, nil diff --git a/service/controller/resource/monitoring/scrapeconfigs/resource_test.go b/service/controller/resource/monitoring/scrapeconfigs/resource_test.go index f84ce2973..86d73ae63 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/resource_test.go +++ b/service/controller/resource/monitoring/scrapeconfigs/resource_test.go @@ -119,6 +119,7 @@ func TestAWSScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } @@ -220,6 +221,7 @@ func TestAzureScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } @@ -322,6 +324,7 @@ func TestKVMScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } @@ -444,6 +447,7 @@ func TestOpenStackScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } @@ -566,6 +570,7 @@ func TestGCPScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } @@ -688,6 +693,7 @@ func TestCAPAScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..48f080cf6 --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden @@ -0,0 +1,490 @@ + +# Add scrape configuration for docker +- job_name: eks-sample-prometheus/docker-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:9323/proxy/metrics + - target_label: app + replacement: docker + - source_labels: [__meta_kubernetes_node_address_InternalIP] + replacement: ${1}:9323 + target_label: instance + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + - source_labels: [__name__] + regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) + action: keep +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..d573fa28b --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden @@ -0,0 +1,500 @@ + +# Add scrape configuration for docker +- job_name: eks-sample-prometheus/docker-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:9323/proxy/metrics + - target_label: app + replacement: docker + - source_labels: [__meta_kubernetes_node_address_InternalIP] + replacement: ${1}:9323 + target_label: instance + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + - source_labels: [__name__] + regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) + action: keep +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..571aa6899 --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden @@ -0,0 +1,444 @@ + +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..b6a2fd2c8 --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden @@ -0,0 +1,444 @@ + +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..8fd41662d --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden @@ -0,0 +1,500 @@ + +# Add scrape configuration for docker +- job_name: eks-sample-prometheus/docker-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:9323/proxy/metrics + - target_label: app + replacement: docker + - source_labels: [__meta_kubernetes_node_address_InternalIP] + replacement: ${1}:9323 + target_label: instance + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + - source_labels: [__name__] + regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) + action: keep +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..60b981f67 --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden @@ -0,0 +1,444 @@ + +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/verticalpodautoscaler/test/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/verticalpodautoscaler/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..d9b572e51 --- /dev/null +++ b/service/controller/resource/monitoring/verticalpodautoscaler/test/case-6-cluster-api-eks.golden @@ -0,0 +1,28 @@ +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus + namespace: eks-sample-prometheus +spec: + resourcePolicy: + containerPolicies: + - containerName: prometheus + controlledValues: RequestsAndLimits + maxAllowed: + cpu: "4" + memory: "13743895347" + minAllowed: + cpu: 100m + memory: "1073741824" + mode: Auto + targetRef: + apiVersion: apps/v1 + kind: StatefulSet + name: prometheus-eks-sample + updatePolicy: + updateMode: Auto +status: {} diff --git a/service/controller/resource/namespace/test/case-6-cluster-api-eks.golden b/service/controller/resource/namespace/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..e4dd548e7 --- /dev/null +++ b/service/controller/resource/namespace/test/case-6-cluster-api-eks.golden @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Namespace +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: eks-sample-prometheus +spec: {} +status: {} diff --git a/service/key/key.go b/service/key/key.go index 743cbed29..5b50d094e 100644 --- a/service/key/key.go +++ b/service/key/key.go @@ -294,6 +294,20 @@ func IsManagementCluster(installation string, obj interface{}) bool { } } +func IsEKSCluster(obj interface{}) bool { + switch v := obj.(type) { + case *v1.Service: + return false + case *capi.Cluster: + if v.Spec.InfrastructureRef.Kind == "AWSManagedCluster" { + return true + } + return false + default: + return false + } +} + func ClusterType(installation string, obj interface{}) string { if IsManagementCluster(installation, obj) { return "management_cluster" From 7eff5c5bf649ae3a744fc92f76b12bf212d341e0 Mon Sep 17 00:00:00 2001 From: Marie Roque Date: Thu, 21 Sep 2023 09:52:41 +0200 Subject: [PATCH 2/4] Refactoring --- .../monitoring/scrapeconfigs/resource.go | 23 +- .../test/aws/case-6-cluster-api-eks.golden | 370 +++++++++++- .../test/azure/case-6-cluster-api-eks.golden | 381 +++++++++++- .../test/capa/case-6-cluster-api-eks.golden | 556 +++++++++++++++++- .../test/gcp/case-6-cluster-api-eks.golden | 556 +++++++++++++++++- .../test/kvm/case-6-cluster-api-eks.golden | 381 +++++++++++- .../openstack/case-6-cluster-api-eks.golden | 556 +++++++++++++++++- service/key/key.go | 13 +- 8 files changed, 2754 insertions(+), 82 deletions(-) diff --git a/service/controller/resource/monitoring/scrapeconfigs/resource.go b/service/controller/resource/monitoring/scrapeconfigs/resource.go index 7346ebc0e..55435daa0 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/resource.go +++ b/service/controller/resource/monitoring/scrapeconfigs/resource.go @@ -308,18 +308,17 @@ func listTargetsToIgnore(ctx context.Context, ctrlClient client.Client, cluster ignoredTargets = append(ignoredTargets, "etcd") } } - - // Vintage WC - if !key.IsCAPIManagementCluster(config.Provider) && !key.IsManagementCluster(config.Installation, cluster) { - // Since 18.0.0 we cannot scrape k8s endpoints externally so we ignore those targets. - release := cluster.GetLabels()["release.giantswarm.io/version"] - version, err := semver.Parse(release) - if err != nil { - return nil, microerror.Mask(err) - } - if version.Major >= 18 { - ignoredTargets = append(ignoredTargets, "kube-controller-manager", "kube-scheduler") - } + } + // Vintage WC + if !key.IsCAPIManagementCluster(config.Provider) && !key.IsManagementCluster(config.Installation, cluster) { + // Since 18.0.0 we cannot scrape k8s endpoints externally so we ignore those targets. + release := cluster.GetLabels()["release.giantswarm.io/version"] + version, err := semver.Parse(release) + if err != nil { + return nil, microerror.Mask(err) + } + if version.Major >= 18 { + ignoredTargets = append(ignoredTargets, "kube-controller-manager", "kube-scheduler") } } return ignoredTargets, nil diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden index 48f080cf6..991a8bdfa 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden @@ -1,4 +1,50 @@ - +- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_label_component] + regex: apiserver + action: keep + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: https + action: keep + - target_label: app + replacement: kubernetes + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role # Add scrape configuration for docker - job_name: eks-sample-prometheus/docker-eks-sample/0 honor_labels: true @@ -53,6 +99,128 @@ - source_labels: [__name__] regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) action: keep +# Add kubelet configuration +- job_name: eks-sample-prometheus/kubelet-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: true + relabel_configs: + - target_label: app + replacement: kubelet + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics + - source_labels: [__meta_kubernetes_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop uid label from kubelet + - action: labeldrop + regex: uid +# Add scrape configuration for cadvisor +- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor + - target_label: app + replacement: cadvisor + # Add node name. + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM + - action: labeldrop + regex: id|name + # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 + - source_labels: [__name__] + regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) + action: drop + - source_labels: [namespace] + regex: (kube-system|giantswarm.*|kong.*|kyverno) + action: keep # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -119,6 +287,63 @@ # Add customer label. - target_label: customer replacement: pmo +# Add etcd configuration +- job_name: eks-sample-prometheus/etcd-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_node_label_role] + regex: control-plane|master + action: keep + # by default use node address + - source_labels: [__address__] + regex: (.*):10250 + target_label: __address__ + replacement: ${1}:2379 + action: replace + - target_label: app + replacement: etcd + - source_labels: [__address__] + target_label: instance + # Add ip label. + - target_label: ip + source_labels: [__meta_kubernetes_node_address_InternalIP] + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -193,6 +418,82 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop +# coredns +- job_name: eks-sample-prometheus/coredns-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_pod_container_name] + regex: coredns + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (coredns.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) + action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -264,6 +565,64 @@ # Add customer label. - target_label: customer replacement: pmo +# node-exporter +- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: true + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (node-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;node-exporter.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) + action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -357,15 +716,6 @@ - target_label: customer replacement: pmo metric_relabel_configs: - - source_labels: [container] - regex: prometheus-operator-app - action: drop - - source_labels: [app] - regex: coredns - action: drop - - source_labels: [app] - regex: kube-state-metrics - action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden index d573fa28b..04b9d2194 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden @@ -1,4 +1,52 @@ - +- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_label_component] + regex: apiserver + action: keep + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: https + action: keep + - target_label: app + replacement: kubernetes + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role # Add scrape configuration for docker - job_name: eks-sample-prometheus/docker-eks-sample/0 honor_labels: true @@ -55,6 +103,132 @@ - source_labels: [__name__] regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) action: keep +# Add kubelet configuration +- job_name: eks-sample-prometheus/kubelet-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - target_label: app + replacement: kubelet + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics + - source_labels: [__meta_kubernetes_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop uid label from kubelet + - action: labeldrop + regex: uid +# Add scrape configuration for cadvisor +- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor + - target_label: app + replacement: cadvisor + # Add node name. + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM + - action: labeldrop + regex: id|name + # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 + - source_labels: [__name__] + regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) + action: drop + - source_labels: [namespace] + regex: (kube-system|giantswarm.*|kong.*|kyverno) + action: keep # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -123,6 +297,64 @@ # Add customer label. - target_label: customer replacement: pmo +# Add etcd configuration +- job_name: eks-sample-prometheus/etcd-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_node_label_role] + regex: control-plane|master + action: keep + # by default use node address + - source_labels: [__address__] + regex: (.*):10250 + target_label: __address__ + replacement: ${1}:2379 + action: replace + - target_label: app + replacement: etcd + - source_labels: [__address__] + target_label: instance + # Add ip label. + - target_label: ip + source_labels: [__meta_kubernetes_node_address_InternalIP] + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -199,6 +431,84 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop +# coredns +- job_name: eks-sample-prometheus/coredns-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_pod_container_name] + regex: coredns + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (coredns.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) + action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -272,6 +582,66 @@ # Add customer label. - target_label: customer replacement: pmo +# node-exporter +- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (node-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;node-exporter.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) + action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -367,15 +737,6 @@ - target_label: customer replacement: pmo metric_relabel_configs: - - source_labels: [container] - regex: prometheus-operator-app - action: drop - - source_labels: [app] - regex: coredns - action: drop - - source_labels: [app] - regex: kube-state-metrics - action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden index 571aa6899..20c7ec7a6 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden @@ -1,4 +1,178 @@ - +- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_label_component] + regex: apiserver + action: keep + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: https + action: keep + - target_label: app + replacement: kubernetes + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role +# Add kubelet configuration +- job_name: eks-sample-prometheus/kubelet-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - target_label: app + replacement: kubelet + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics + - source_labels: [__meta_kubernetes_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop uid label from kubelet + - action: labeldrop + regex: uid +# Add scrape configuration for cadvisor +- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor + - target_label: app + replacement: cadvisor + # Add node name. + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM + - action: labeldrop + regex: id|name + # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 + - source_labels: [__name__] + regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) + action: drop + - source_labels: [namespace] + regex: (kube-system|giantswarm.*|kong.*|kyverno) + action: keep # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -67,6 +241,239 @@ # Add customer label. - target_label: customer replacement: pmo +# Add etcd configuration +- job_name: eks-sample-prometheus/etcd-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_pod_container_name] + regex: (etcd) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:2381/proxy/metrics + action: replace + - source_labels: [ __meta_kubernetes_pod_name ] + target_label: pod_name + - target_label: app + replacement: etcd + - source_labels: [__address__] + target_label: instance + # Add ip label. + - target_label: ip + source_labels: [__meta_kubernetes_node_address_InternalIP] + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role +# kube-controller-manager +- job_name: eks-sample-prometheus/kubernetes-controller-manager-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: 10257 + target_label: __tmp_port + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] + action: replace + regex: true;(\d+) + replacement: $1 + target_label: __tmp_port + - source_labels: [__address__, __tmp_port] + target_label: instance + regex: (.+);(.+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_pod_container_name] + regex: (k8s-controller-manager|kube-controller-manager) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name, __tmp_port] + target_label: __metrics_path__ + regex: (.+);(\d+) + replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics + - target_label: app + replacement: kube-controller-manager + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# kube-scheduler +- job_name: eks-sample-prometheus/kubernetes-scheduler-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: 10259 + target_label: __tmp_port + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] + action: replace + regex: true;(\d+) + replacement: $1 + target_label: __tmp_port + - source_labels: [__address__, __tmp_port] + target_label: instance + regex: (.+);(.+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_pod_container_name] + regex: (k8s-scheduler|kube-scheduler) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name, __tmp_port] + target_label: __metrics_path__ + regex: (.+);(\d+) + replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics + - target_label: app + replacement: kube-scheduler + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -143,6 +550,84 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop +# coredns +- job_name: eks-sample-prometheus/coredns-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_pod_container_name] + regex: coredns + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (coredns.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) + action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -216,6 +701,66 @@ # Add customer label. - target_label: customer replacement: pmo +# node-exporter +- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (node-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;node-exporter.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) + action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -311,15 +856,6 @@ - target_label: customer replacement: pmo metric_relabel_configs: - - source_labels: [container] - regex: prometheus-operator-app - action: drop - - source_labels: [app] - regex: coredns - action: drop - - source_labels: [app] - regex: kube-state-metrics - action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden index b6a2fd2c8..812bb2adb 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden @@ -1,4 +1,178 @@ - +- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_label_component] + regex: apiserver + action: keep + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: https + action: keep + - target_label: app + replacement: kubernetes + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role +# Add kubelet configuration +- job_name: eks-sample-prometheus/kubelet-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - target_label: app + replacement: kubelet + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics + - source_labels: [__meta_kubernetes_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop uid label from kubelet + - action: labeldrop + regex: uid +# Add scrape configuration for cadvisor +- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor + - target_label: app + replacement: cadvisor + # Add node name. + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM + - action: labeldrop + regex: id|name + # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 + - source_labels: [__name__] + regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) + action: drop + - source_labels: [namespace] + regex: (kube-system|giantswarm.*|kong.*|kyverno) + action: keep # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -67,6 +241,239 @@ # Add customer label. - target_label: customer replacement: pmo +# Add etcd configuration +- job_name: eks-sample-prometheus/etcd-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_pod_container_name] + regex: (etcd) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:2381/proxy/metrics + action: replace + - source_labels: [ __meta_kubernetes_pod_name ] + target_label: pod_name + - target_label: app + replacement: etcd + - source_labels: [__address__] + target_label: instance + # Add ip label. + - target_label: ip + source_labels: [__meta_kubernetes_node_address_InternalIP] + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role +# kube-controller-manager +- job_name: eks-sample-prometheus/kubernetes-controller-manager-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: 10257 + target_label: __tmp_port + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] + action: replace + regex: true;(\d+) + replacement: $1 + target_label: __tmp_port + - source_labels: [__address__, __tmp_port] + target_label: instance + regex: (.+);(.+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_pod_container_name] + regex: (k8s-controller-manager|kube-controller-manager) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name, __tmp_port] + target_label: __metrics_path__ + regex: (.+);(\d+) + replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics + - target_label: app + replacement: kube-controller-manager + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# kube-scheduler +- job_name: eks-sample-prometheus/kubernetes-scheduler-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: 10259 + target_label: __tmp_port + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] + action: replace + regex: true;(\d+) + replacement: $1 + target_label: __tmp_port + - source_labels: [__address__, __tmp_port] + target_label: instance + regex: (.+);(.+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_pod_container_name] + regex: (k8s-scheduler|kube-scheduler) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name, __tmp_port] + target_label: __metrics_path__ + regex: (.+);(\d+) + replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics + - target_label: app + replacement: kube-scheduler + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -143,6 +550,84 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop +# coredns +- job_name: eks-sample-prometheus/coredns-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_pod_container_name] + regex: coredns + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (coredns.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) + action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -216,6 +701,66 @@ # Add customer label. - target_label: customer replacement: pmo +# node-exporter +- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (node-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;node-exporter.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) + action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -311,15 +856,6 @@ - target_label: customer replacement: pmo metric_relabel_configs: - - source_labels: [container] - regex: prometheus-operator-app - action: drop - - source_labels: [app] - regex: coredns - action: drop - - source_labels: [app] - regex: kube-state-metrics - action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden index 8fd41662d..d0812b3de 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden @@ -1,4 +1,52 @@ - +- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_label_component] + regex: apiserver + action: keep + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: https + action: keep + - target_label: app + replacement: kubernetes + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role # Add scrape configuration for docker - job_name: eks-sample-prometheus/docker-eks-sample/0 honor_labels: true @@ -55,6 +103,132 @@ - source_labels: [__name__] regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) action: keep +# Add kubelet configuration +- job_name: eks-sample-prometheus/kubelet-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - target_label: app + replacement: kubelet + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics + - source_labels: [__meta_kubernetes_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop uid label from kubelet + - action: labeldrop + regex: uid +# Add scrape configuration for cadvisor +- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor + - target_label: app + replacement: cadvisor + # Add node name. + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM + - action: labeldrop + regex: id|name + # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 + - source_labels: [__name__] + regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) + action: drop + - source_labels: [namespace] + regex: (kube-system|giantswarm.*|kong.*|kyverno) + action: keep # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -123,6 +297,64 @@ # Add customer label. - target_label: customer replacement: pmo +# Add etcd configuration +- job_name: eks-sample-prometheus/etcd-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_node_label_role] + regex: control-plane|master + action: keep + # by default use node address + - source_labels: [__address__] + regex: (.*):10250 + target_label: __address__ + replacement: ${1}:2379 + action: replace + - target_label: app + replacement: etcd + - source_labels: [__address__] + target_label: instance + # Add ip label. + - target_label: ip + source_labels: [__meta_kubernetes_node_address_InternalIP] + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -199,6 +431,84 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop +# coredns +- job_name: eks-sample-prometheus/coredns-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_pod_container_name] + regex: coredns + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (coredns.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) + action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -272,6 +582,66 @@ # Add customer label. - target_label: customer replacement: pmo +# node-exporter +- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (node-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;node-exporter.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) + action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -367,15 +737,6 @@ - target_label: customer replacement: pmo metric_relabel_configs: - - source_labels: [container] - regex: prometheus-operator-app - action: drop - - source_labels: [app] - regex: coredns - action: drop - - source_labels: [app] - regex: kube-state-metrics - action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden index 60b981f67..1689fae4f 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden @@ -1,4 +1,178 @@ - +- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_label_component] + regex: apiserver + action: keep + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: https + action: keep + - target_label: app + replacement: kubernetes + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role +# Add kubelet configuration +- job_name: eks-sample-prometheus/kubelet-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - target_label: app + replacement: kubelet + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics + - source_labels: [__meta_kubernetes_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop uid label from kubelet + - action: labeldrop + regex: uid +# Add scrape configuration for cadvisor +- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor + - target_label: app + replacement: cadvisor + # Add node name. + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM + - action: labeldrop + regex: id|name + # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 + - source_labels: [__name__] + regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) + action: drop + - source_labels: [namespace] + regex: (kube-system|giantswarm.*|kong.*|kyverno) + action: keep # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -67,6 +241,239 @@ # Add customer label. - target_label: customer replacement: pmo +# Add etcd configuration +- job_name: eks-sample-prometheus/etcd-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_pod_container_name] + regex: (etcd) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:2381/proxy/metrics + action: replace + - source_labels: [ __meta_kubernetes_pod_name ] + target_label: pod_name + - target_label: app + replacement: etcd + - source_labels: [__address__] + target_label: instance + # Add ip label. + - target_label: ip + source_labels: [__meta_kubernetes_node_address_InternalIP] + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role +# kube-controller-manager +- job_name: eks-sample-prometheus/kubernetes-controller-manager-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: 10257 + target_label: __tmp_port + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] + action: replace + regex: true;(\d+) + replacement: $1 + target_label: __tmp_port + - source_labels: [__address__, __tmp_port] + target_label: instance + regex: (.+);(.+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_pod_container_name] + regex: (k8s-controller-manager|kube-controller-manager) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name, __tmp_port] + target_label: __metrics_path__ + regex: (.+);(\d+) + replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics + - target_label: app + replacement: kube-controller-manager + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# kube-scheduler +- job_name: eks-sample-prometheus/kubernetes-scheduler-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: 10259 + target_label: __tmp_port + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] + action: replace + regex: true;(\d+) + replacement: $1 + target_label: __tmp_port + - source_labels: [__address__, __tmp_port] + target_label: instance + regex: (.+);(.+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_pod_container_name] + regex: (k8s-scheduler|kube-scheduler) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name, __tmp_port] + target_label: __metrics_path__ + regex: (.+);(\d+) + replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics + - target_label: app + replacement: kube-scheduler + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -143,6 +550,84 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop +# coredns +- job_name: eks-sample-prometheus/coredns-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_pod_container_name] + regex: coredns + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (coredns.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) + action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -216,6 +701,66 @@ # Add customer label. - target_label: customer replacement: pmo +# node-exporter +- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (node-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;node-exporter.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) + action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -311,15 +856,6 @@ - target_label: customer replacement: pmo metric_relabel_configs: - - source_labels: [container] - regex: prometheus-operator-app - action: drop - - source_labels: [app] - regex: coredns - action: drop - - source_labels: [app] - regex: kube-state-metrics - action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/key/key.go b/service/key/key.go index 5b50d094e..051e9ffca 100644 --- a/service/key/key.go +++ b/service/key/key.go @@ -295,17 +295,10 @@ func IsManagementCluster(installation string, obj interface{}) bool { } func IsEKSCluster(obj interface{}) bool { - switch v := obj.(type) { - case *v1.Service: - return false - case *capi.Cluster: - if v.Spec.InfrastructureRef.Kind == "AWSManagedCluster" { - return true - } - return false - default: - return false + if c, ok := obj.(capi.Cluster); ok { + return c.Spec.InfrastructureRef.Kind == "AWSManagedCluster" } + return false } func ClusterType(installation string, obj interface{}) string { From 484df0e209d5956954bcaf4e6ad52db1a0f0ce52 Mon Sep 17 00:00:00 2001 From: Marie Roque Date: Thu, 21 Sep 2023 10:00:09 +0200 Subject: [PATCH 3/4] Fix test --- .../test/notification-template/case-6-cluster-api-eks.golden | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/controller/resource/alerting/alertmanagerconfig/test/notification-template/case-6-cluster-api-eks.golden b/service/controller/resource/alerting/alertmanagerconfig/test/notification-template/case-6-cluster-api-eks.golden index 9b85e2b02..7e8e54c13 100644 --- a/service/controller/resource/alerting/alertmanagerconfig/test/notification-template/case-6-cluster-api-eks.golden +++ b/service/controller/resource/alerting/alertmanagerconfig/test/notification-template/case-6-cluster-api-eks.golden @@ -1,6 +1,6 @@ {{ define "__alertmanager" }}Alertmanager{{ end }} {{ define "__alertmanagerurl" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}&silenced=false&inhibited=false&active=true&filter=%7Balertname%3D%22{{ .CommonLabels.alertname }}%22%7D{{ end }} -{{ define "__dashboardurl" -}}https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}{{- end }} +{{ define "__dashboardurl" -}}{{ if hasPrefix "https://" (index .Alerts 0).Annotations.dashboard }}{{ (index .Alerts 0).Annotations.dashboard }}{{ else }}https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}{{ end }}{{- end }} {{ define "__runbookurl" -}}https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}{{- end }} {{ define "slack.default.title" }}{{ .Status | toUpper }}[{{ if eq .Status "firing" }}{{ .Alerts.Firing | len }}{{- else }}{{ .Alerts.Resolved | len }}{{- end }}] {{ (index .Alerts 0).Labels.alertname }} - Team {{ (index .Alerts 0).Labels.team }}{{ end }} From 7e5d6f942b1596faab0d74f00a4bcb74c8a5b3da Mon Sep 17 00:00:00 2001 From: Marie Roque Date: Thu, 21 Sep 2023 10:50:43 +0200 Subject: [PATCH 4/4] Fix isEKSCluster function --- .../test/aws/case-6-cluster-api-eks.golden | 370 +----------- .../test/azure/case-6-cluster-api-eks.golden | 381 +----------- .../test/capa/case-6-cluster-api-eks.golden | 556 +----------------- .../test/gcp/case-6-cluster-api-eks.golden | 556 +----------------- .../test/kvm/case-6-cluster-api-eks.golden | 381 +----------- .../openstack/case-6-cluster-api-eks.golden | 556 +----------------- service/key/key.go | 2 +- 7 files changed, 61 insertions(+), 2741 deletions(-) diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden index 991a8bdfa..48f080cf6 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden @@ -1,50 +1,4 @@ -- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - api_server: https://master.eks-sample:443 - bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - insecure_skip_verify: false - bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_service_label_component] - regex: apiserver - action: keep - - source_labels: [__meta_kubernetes_endpoint_port_name] - regex: https - action: keep - - target_label: app - replacement: kubernetes - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: aws - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role + # Add scrape configuration for docker - job_name: eks-sample-prometheus/docker-eks-sample/0 honor_labels: true @@ -99,128 +53,6 @@ - source_labels: [__name__] regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) action: keep -# Add kubelet configuration -- job_name: eks-sample-prometheus/kubelet-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - insecure_skip_verify: false - bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - insecure_skip_verify: true - relabel_configs: - - target_label: app - replacement: kubelet - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics - - source_labels: [__meta_kubernetes_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: aws - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop - # drop uid label from kubelet - - action: labeldrop - regex: uid -# Add scrape configuration for cadvisor -- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - insecure_skip_verify: false - bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - insecure_skip_verify: false - relabel_configs: - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor - - target_label: app - replacement: cadvisor - # Add node name. - - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: aws - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM - - action: labeldrop - regex: id|name - # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 - - source_labels: [__name__] - regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) - action: drop - - source_labels: [namespace] - regex: (kube-system|giantswarm.*|kong.*|kyverno) - action: keep # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -287,63 +119,6 @@ # Add customer label. - target_label: customer replacement: pmo -# Add etcd configuration -- job_name: eks-sample-prometheus/etcd-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_node_label_role] - regex: control-plane|master - action: keep - # by default use node address - - source_labels: [__address__] - regex: (.*):10250 - target_label: __address__ - replacement: ${1}:2379 - action: replace - - target_label: app - replacement: etcd - - source_labels: [__address__] - target_label: instance - # Add ip label. - - target_label: ip - source_labels: [__meta_kubernetes_node_address_InternalIP] - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: aws - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -418,82 +193,6 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop -# coredns -- job_name: eks-sample-prometheus/coredns-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - insecure_skip_verify: false - bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - target_label: instance - - source_labels: [__meta_kubernetes_pod_container_name] - regex: coredns - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (coredns.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: aws - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) - action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -565,64 +264,6 @@ # Add customer label. - target_label: customer replacement: pmo -# node-exporter -- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - api_server: https://master.eks-sample:443 - bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - insecure_skip_verify: false - bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - insecure_skip_verify: true - relabel_configs: - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (node-exporter.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] - regex: kube-system;node-exporter.* - action: keep - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: aws - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) - action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -716,6 +357,15 @@ - target_label: customer replacement: pmo metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden index 04b9d2194..d573fa28b 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden @@ -1,52 +1,4 @@ -- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_service_label_component] - regex: apiserver - action: keep - - source_labels: [__meta_kubernetes_endpoint_port_name] - regex: https - action: keep - - target_label: app - replacement: kubernetes - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: azure - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role + # Add scrape configuration for docker - job_name: eks-sample-prometheus/docker-eks-sample/0 honor_labels: true @@ -103,132 +55,6 @@ - source_labels: [__name__] regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) action: keep -# Add kubelet configuration -- job_name: eks-sample-prometheus/kubelet-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - target_label: app - replacement: kubelet - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics - - source_labels: [__meta_kubernetes_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: azure - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop - # drop uid label from kubelet - - action: labeldrop - regex: uid -# Add scrape configuration for cadvisor -- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - relabel_configs: - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor - - target_label: app - replacement: cadvisor - # Add node name. - - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: azure - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM - - action: labeldrop - regex: id|name - # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 - - source_labels: [__name__] - regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) - action: drop - - source_labels: [namespace] - regex: (kube-system|giantswarm.*|kong.*|kyverno) - action: keep # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -297,64 +123,6 @@ # Add customer label. - target_label: customer replacement: pmo -# Add etcd configuration -- job_name: eks-sample-prometheus/etcd-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_node_label_role] - regex: control-plane|master - action: keep - # by default use node address - - source_labels: [__address__] - regex: (.*):10250 - target_label: __address__ - replacement: ${1}:2379 - action: replace - - target_label: app - replacement: etcd - - source_labels: [__address__] - target_label: instance - # Add ip label. - - target_label: ip - source_labels: [__meta_kubernetes_node_address_InternalIP] - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: azure - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -431,84 +199,6 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop -# coredns -- job_name: eks-sample-prometheus/coredns-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - target_label: instance - - source_labels: [__meta_kubernetes_pod_container_name] - regex: coredns - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (coredns.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: azure - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) - action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -582,66 +272,6 @@ # Add customer label. - target_label: customer replacement: pmo -# node-exporter -- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (node-exporter.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] - regex: kube-system;node-exporter.* - action: keep - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: azure - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) - action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -737,6 +367,15 @@ - target_label: customer replacement: pmo metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden index 20c7ec7a6..571aa6899 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden @@ -1,178 +1,4 @@ -- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_service_label_component] - regex: apiserver - action: keep - - source_labels: [__meta_kubernetes_endpoint_port_name] - regex: https - action: keep - - target_label: app - replacement: kubernetes - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: capa - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role -# Add kubelet configuration -- job_name: eks-sample-prometheus/kubelet-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - target_label: app - replacement: kubelet - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics - - source_labels: [__meta_kubernetes_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: capa - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop - # drop uid label from kubelet - - action: labeldrop - regex: uid -# Add scrape configuration for cadvisor -- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - relabel_configs: - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor - - target_label: app - replacement: cadvisor - # Add node name. - - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: capa - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM - - action: labeldrop - regex: id|name - # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 - - source_labels: [__name__] - regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) - action: drop - - source_labels: [namespace] - regex: (kube-system|giantswarm.*|kong.*|kyverno) - action: keep + # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -241,239 +67,6 @@ # Add customer label. - target_label: customer replacement: pmo -# Add etcd configuration -- job_name: eks-sample-prometheus/etcd-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_pod_container_name] - regex: (etcd) - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:2381/proxy/metrics - action: replace - - source_labels: [ __meta_kubernetes_pod_name ] - target_label: pod_name - - target_label: app - replacement: etcd - - source_labels: [__address__] - target_label: instance - # Add ip label. - - target_label: ip - source_labels: [__meta_kubernetes_node_address_InternalIP] - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: capa - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role -# kube-controller-manager -- job_name: eks-sample-prometheus/kubernetes-controller-manager-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - replacement: 10257 - target_label: __tmp_port - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] - action: replace - regex: true;(\d+) - replacement: $1 - target_label: __tmp_port - - source_labels: [__address__, __tmp_port] - target_label: instance - regex: (.+);(.+) - replacement: $1:$2 - - source_labels: [__meta_kubernetes_pod_container_name] - regex: (k8s-controller-manager|kube-controller-manager) - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name, __tmp_port] - target_label: __metrics_path__ - regex: (.+);(\d+) - replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics - - target_label: app - replacement: kube-controller-manager - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: capa - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop -# kube-scheduler -- job_name: eks-sample-prometheus/kubernetes-scheduler-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - replacement: 10259 - target_label: __tmp_port - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] - action: replace - regex: true;(\d+) - replacement: $1 - target_label: __tmp_port - - source_labels: [__address__, __tmp_port] - target_label: instance - regex: (.+);(.+) - replacement: $1:$2 - - source_labels: [__meta_kubernetes_pod_container_name] - regex: (k8s-scheduler|kube-scheduler) - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name, __tmp_port] - target_label: __metrics_path__ - regex: (.+);(\d+) - replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics - - target_label: app - replacement: kube-scheduler - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: capa - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -550,84 +143,6 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop -# coredns -- job_name: eks-sample-prometheus/coredns-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - target_label: instance - - source_labels: [__meta_kubernetes_pod_container_name] - regex: coredns - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (coredns.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: capa - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) - action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -701,66 +216,6 @@ # Add customer label. - target_label: customer replacement: pmo -# node-exporter -- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (node-exporter.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] - regex: kube-system;node-exporter.* - action: keep - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: capa - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) - action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -856,6 +311,15 @@ - target_label: customer replacement: pmo metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden index 812bb2adb..b6a2fd2c8 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden @@ -1,178 +1,4 @@ -- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_service_label_component] - regex: apiserver - action: keep - - source_labels: [__meta_kubernetes_endpoint_port_name] - regex: https - action: keep - - target_label: app - replacement: kubernetes - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: gcp - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role -# Add kubelet configuration -- job_name: eks-sample-prometheus/kubelet-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - target_label: app - replacement: kubelet - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics - - source_labels: [__meta_kubernetes_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: gcp - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop - # drop uid label from kubelet - - action: labeldrop - regex: uid -# Add scrape configuration for cadvisor -- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - relabel_configs: - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor - - target_label: app - replacement: cadvisor - # Add node name. - - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: gcp - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM - - action: labeldrop - regex: id|name - # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 - - source_labels: [__name__] - regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) - action: drop - - source_labels: [namespace] - regex: (kube-system|giantswarm.*|kong.*|kyverno) - action: keep + # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -241,239 +67,6 @@ # Add customer label. - target_label: customer replacement: pmo -# Add etcd configuration -- job_name: eks-sample-prometheus/etcd-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_pod_container_name] - regex: (etcd) - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:2381/proxy/metrics - action: replace - - source_labels: [ __meta_kubernetes_pod_name ] - target_label: pod_name - - target_label: app - replacement: etcd - - source_labels: [__address__] - target_label: instance - # Add ip label. - - target_label: ip - source_labels: [__meta_kubernetes_node_address_InternalIP] - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: gcp - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role -# kube-controller-manager -- job_name: eks-sample-prometheus/kubernetes-controller-manager-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - replacement: 10257 - target_label: __tmp_port - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] - action: replace - regex: true;(\d+) - replacement: $1 - target_label: __tmp_port - - source_labels: [__address__, __tmp_port] - target_label: instance - regex: (.+);(.+) - replacement: $1:$2 - - source_labels: [__meta_kubernetes_pod_container_name] - regex: (k8s-controller-manager|kube-controller-manager) - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name, __tmp_port] - target_label: __metrics_path__ - regex: (.+);(\d+) - replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics - - target_label: app - replacement: kube-controller-manager - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: gcp - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop -# kube-scheduler -- job_name: eks-sample-prometheus/kubernetes-scheduler-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - replacement: 10259 - target_label: __tmp_port - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] - action: replace - regex: true;(\d+) - replacement: $1 - target_label: __tmp_port - - source_labels: [__address__, __tmp_port] - target_label: instance - regex: (.+);(.+) - replacement: $1:$2 - - source_labels: [__meta_kubernetes_pod_container_name] - regex: (k8s-scheduler|kube-scheduler) - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name, __tmp_port] - target_label: __metrics_path__ - regex: (.+);(\d+) - replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics - - target_label: app - replacement: kube-scheduler - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: gcp - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -550,84 +143,6 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop -# coredns -- job_name: eks-sample-prometheus/coredns-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - target_label: instance - - source_labels: [__meta_kubernetes_pod_container_name] - regex: coredns - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (coredns.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: gcp - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) - action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -701,66 +216,6 @@ # Add customer label. - target_label: customer replacement: pmo -# node-exporter -- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (node-exporter.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] - regex: kube-system;node-exporter.* - action: keep - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: gcp - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) - action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -856,6 +311,15 @@ - target_label: customer replacement: pmo metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden index d0812b3de..8fd41662d 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden @@ -1,52 +1,4 @@ -- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_service_label_component] - regex: apiserver - action: keep - - source_labels: [__meta_kubernetes_endpoint_port_name] - regex: https - action: keep - - target_label: app - replacement: kubernetes - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: kvm - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role + # Add scrape configuration for docker - job_name: eks-sample-prometheus/docker-eks-sample/0 honor_labels: true @@ -103,132 +55,6 @@ - source_labels: [__name__] regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) action: keep -# Add kubelet configuration -- job_name: eks-sample-prometheus/kubelet-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - target_label: app - replacement: kubelet - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics - - source_labels: [__meta_kubernetes_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: kvm - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop - # drop uid label from kubelet - - action: labeldrop - regex: uid -# Add scrape configuration for cadvisor -- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - relabel_configs: - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor - - target_label: app - replacement: cadvisor - # Add node name. - - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: kvm - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM - - action: labeldrop - regex: id|name - # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 - - source_labels: [__name__] - regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) - action: drop - - source_labels: [namespace] - regex: (kube-system|giantswarm.*|kong.*|kyverno) - action: keep # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -297,64 +123,6 @@ # Add customer label. - target_label: customer replacement: pmo -# Add etcd configuration -- job_name: eks-sample-prometheus/etcd-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_node_label_role] - regex: control-plane|master - action: keep - # by default use node address - - source_labels: [__address__] - regex: (.*):10250 - target_label: __address__ - replacement: ${1}:2379 - action: replace - - target_label: app - replacement: etcd - - source_labels: [__address__] - target_label: instance - # Add ip label. - - target_label: ip - source_labels: [__meta_kubernetes_node_address_InternalIP] - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: kvm - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -431,84 +199,6 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop -# coredns -- job_name: eks-sample-prometheus/coredns-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - target_label: instance - - source_labels: [__meta_kubernetes_pod_container_name] - regex: coredns - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (coredns.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: kvm - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) - action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -582,66 +272,6 @@ # Add customer label. - target_label: customer replacement: pmo -# node-exporter -- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (node-exporter.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] - regex: kube-system;node-exporter.* - action: keep - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: kvm - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) - action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -737,6 +367,15 @@ - target_label: customer replacement: pmo metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden index 1689fae4f..60b981f67 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden +++ b/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden @@ -1,178 +1,4 @@ -- job_name: eks-sample-prometheus/kubernetes-apiserver-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_service_label_component] - regex: apiserver - action: keep - - source_labels: [__meta_kubernetes_endpoint_port_name] - regex: https - action: keep - - target_label: app - replacement: kubernetes - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: openstack - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role -# Add kubelet configuration -- job_name: eks-sample-prometheus/kubelet-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - target_label: app - replacement: kubelet - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics - - source_labels: [__meta_kubernetes_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: openstack - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop - # drop uid label from kubelet - - action: labeldrop - regex: uid -# Add scrape configuration for cadvisor -- job_name: eks-sample-prometheus/cadvisor-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: node - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - relabel_configs: - - source_labels: [__address__] - target_label: instance - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_node_name] - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor - - target_label: app - replacement: cadvisor - # Add node name. - - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: openstack - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - metric_relabel_configs: - # drop id and name labels from cAdvisor as they do not provide value but use a lot of RAM - - action: labeldrop - regex: id|name - # dropping explained here https://github.com/giantswarm/giantswarm/issues/26361 - - source_labels: [__name__] - regex: container_(blkio_device_usage_total|network_transmit_errors_total|network_receive_errors_total|tasks_state|memory_failures_total|memory_max_usage_bytes|cpu_load_average_10s|memory_failcnt|cpu_system_seconds_total) - action: drop - - source_labels: [namespace] - regex: (kube-system|giantswarm.*|kong.*|kyverno) - action: keep + # calico-node - job_name: eks-sample-prometheus/calico-node-eks-sample/0 honor_labels: true @@ -241,239 +67,6 @@ # Add customer label. - target_label: customer replacement: pmo -# Add etcd configuration -- job_name: eks-sample-prometheus/etcd-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_pod_container_name] - regex: (etcd) - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:2381/proxy/metrics - action: replace - - source_labels: [ __meta_kubernetes_pod_name ] - target_label: pod_name - - target_label: app - replacement: etcd - - source_labels: [__address__] - target_label: instance - # Add ip label. - - target_label: ip - source_labels: [__meta_kubernetes_node_address_InternalIP] - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: openstack - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role -# kube-controller-manager -- job_name: eks-sample-prometheus/kubernetes-controller-manager-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - replacement: 10257 - target_label: __tmp_port - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] - action: replace - regex: true;(\d+) - replacement: $1 - target_label: __tmp_port - - source_labels: [__address__, __tmp_port] - target_label: instance - regex: (.+);(.+) - replacement: $1:$2 - - source_labels: [__meta_kubernetes_pod_container_name] - regex: (k8s-controller-manager|kube-controller-manager) - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name, __tmp_port] - target_label: __metrics_path__ - regex: (.+);(\d+) - replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics - - target_label: app - replacement: kube-controller-manager - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: openstack - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop -# kube-scheduler -- job_name: eks-sample-prometheus/kubernetes-scheduler-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - replacement: 10259 - target_label: __tmp_port - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring_port,__meta_kubernetes_pod_annotation_giantswarm_io_monitoring_port] - action: replace - regex: true;(\d+) - replacement: $1 - target_label: __tmp_port - - source_labels: [__address__, __tmp_port] - target_label: instance - regex: (.+);(.+) - replacement: $1:$2 - - source_labels: [__meta_kubernetes_pod_container_name] - regex: (k8s-scheduler|kube-scheduler) - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name, __tmp_port] - target_label: __metrics_path__ - regex: (.+);(\d+) - replacement: /api/v1/namespaces/kube-system/pods/https:${1}:${2}/proxy/metrics - - target_label: app - replacement: kube-scheduler - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: openstack - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused rest client metrics - - source_labels: [__name__] - regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) - action: drop # kube-proxy - job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 honor_labels: true @@ -550,84 +143,6 @@ - source_labels: [__name__] regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) action: drop -# coredns -- job_name: eks-sample-prometheus/coredns-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - kube-system - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - source_labels: [__address__] - target_label: instance - - source_labels: [__meta_kubernetes_pod_container_name] - regex: coredns - action: keep - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (coredns.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:9153/proxy/metrics - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - # Add namespace label. - - source_labels: [__meta_kubernetes_namespace] - target_label: namespace - # Add pod label. - - source_labels: [__meta_kubernetes_pod_name] - target_label: pod - # Add container label. - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: container - # Add node label. - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add role label. - - source_labels: [__meta_kubernetes_node_label_role] - target_label: role - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: openstack - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused coredns metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: coredns_dns_(response_size_bytes_bucket|request_size_bytes_bucket) - action: drop # cert-exporter - job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 honor_labels: true @@ -701,66 +216,6 @@ # Add customer label. - target_label: customer replacement: pmo -# node-exporter -- job_name: eks-sample-prometheus/node-exporter-eks-sample/0 - honor_labels: true - scheme: https - kubernetes_sd_configs: - - role: pod - api_server: https://master.eks-sample:443 - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: false - tls_config: - ca_file: /etc/prometheus/secrets/cluster-certificates/ca - cert_file: /etc/prometheus/secrets/cluster-certificates/crt - key_file: /etc/prometheus/secrets/cluster-certificates/key - insecure_skip_verify: true - relabel_configs: - - target_label: __address__ - replacement: master.eks-sample:443 - - source_labels: [__meta_kubernetes_pod_name] - regex: (node-exporter.*) - target_label: __metrics_path__ - replacement: /api/v1/namespaces/kube-system/pods/${1}:10300/proxy/metrics - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] - regex: kube-system;node-exporter.* - action: keep - - source_labels: [__meta_kubernetes_pod_container_name] - target_label: app - - source_labels: [__meta_kubernetes_pod_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_pod_labelpresent_giantswarm_io_monitoring] - regex: .*(true).* - action: drop - - source_labels: [__meta_kubernetes_pod_node_name] - target_label: node - # Add cluster_id label. - - target_label: cluster_id - replacement: eks-sample - # Add cluster_type label. - - target_label: cluster_type - replacement: workload_cluster - # Add provider label. - - target_label: provider - replacement: openstack - # Add installation label. - - target_label: installation - replacement: test-installation - # Add priority label. - - target_label: service_priority - replacement: highest - # Add organization label. - - target_label: organization - replacement: my-organization - # Add customer label. - - target_label: customer - replacement: pmo - metric_relabel_configs: - # drop unused metrics with the highest cardinality as they increase Prometheus memory usage - - source_labels: [__name__] - regex: node_(filesystem_files|filesystem_readonly|nfs_requests_total|network_carrier|network_transmit_colls_total|network_carrier_changes_total|network_transmit_packets_total|network_carrier_down_changes_total|network_carrier_up_changes_total|network_iface_id|xfs_.+|ethtool_.+) - action: drop - job_name: eks-sample-prometheus/workload-eks-sample/0 honor_labels: true scheme: https @@ -856,6 +311,15 @@ - target_label: customer replacement: pmo metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage - source_labels: [__name__] regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) diff --git a/service/key/key.go b/service/key/key.go index c984dec41..79442c849 100644 --- a/service/key/key.go +++ b/service/key/key.go @@ -297,7 +297,7 @@ func IsManagementCluster(installation string, obj interface{}) bool { } func IsEKSCluster(obj interface{}) bool { - if c, ok := obj.(capi.Cluster); ok { + if c, ok := obj.(*capi.Cluster); ok { return c.Spec.InfrastructureRef.Kind == "AWSManagedCluster" } return false