diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a3314892..7fdec7780 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Adapt scrape targets to EKS clusters. - computation of number of shards: rely on max number of series over the last 6h. ### Fixed diff --git a/pkg/unittest/input/case-5-cluster-api-v1alpha3.golden b/pkg/unittest/input/case-5-cluster-api-v1alpha3.golden index 13217690f..31a56ba5c 100644 --- a/pkg/unittest/input/case-5-cluster-api-v1alpha3.golden +++ b/pkg/unittest/input/case-5-cluster-api-v1alpha3.golden @@ -9,3 +9,5 @@ spec: controlPlaneEndpoint: host: master.baz port: 443 + infrastructureRef: + kind: AWSCluster diff --git a/pkg/unittest/input/case-6-cluster-api-eks.golden b/pkg/unittest/input/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..d70bba102 --- /dev/null +++ b/pkg/unittest/input/case-6-cluster-api-eks.golden @@ -0,0 +1,13 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + labels: + "release.giantswarm.io/version": 18.0.0 + name: eks-sample + namespace: org-my-organization +spec: + controlPlaneEndpoint: + host: master.eks-sample + port: 443 + infrastructureRef: + kind: AWSManagedCluster diff --git a/service/controller/resource/alerting/alertmanagerconfig/test/alertmanager-config/case-6-cluster-api-eks.golden b/service/controller/resource/alerting/alertmanagerconfig/test/alertmanager-config/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..000b738d0 --- /dev/null +++ b/service/controller/resource/alerting/alertmanagerconfig/test/alertmanager-config/case-6-cluster-api-eks.golden @@ -0,0 +1,488 @@ +global: + resolve_timeout: 5m + slack_api_url: https://slack + +templates: +- '/etc/alertmanager/config/*.tmpl' + +route: + group_by: [alertname, cluster_id, installation, status] + group_interval: 15m + group_wait: 30s + repeat_interval: 4h + receiver: root + + routes: + + # Falco noise Slack + - receiver: falco_noise_slack + matchers: + - alertname=~"Falco.*" + continue: false + + - receiver: team_tinkerers_slack + matchers: + - severity=~"page|notify" + - team="tinkerers" + continue: false + + # Team Ops Opsgenie + - receiver: opsgenie_router + matchers: + - severity="page" + continue: true + + # Service Level slack -- chooses the slack channel based on the provider + - receiver: team_phoenix_slack + matchers: + - alertname="ServiceLevelBurnRateTooHigh" + continue: false + + # Team Atlas Slack + - receiver: team_atlas_slack + matchers: + - severity=~"page|notify" + - team="atlas" + - type!="heartbeat" + - alertname!~"Inhibition.*" + continue: false + + # Team Celestial Slack + - receiver: team_phoenix_slack + matchers: + - severity=~"page|notify" + - team="celestial" + - sloth_severity=~"page|ticket" + continue: false + + # Team Firecracker Slack + - receiver: team_phoenix_slack + matchers: + - severity=~"page|notify" + - team="firecracker" + - sloth_severity=~"page|ticket" + continue: false + + # Team Phoenix Slack + - receiver: team_phoenix_slack + matchers: + - severity=~"page|notify" + - team="phoenix" + - sloth_severity=~"page|ticket" + continue: false + + # Team Shield Slack + - receiver: team_shield_slack + matchers: + - severity=~"page|notify" + - team="shield" + continue: false + + # Team BigMac Slack + - receiver: team_bigmac_slack + matchers: + - severity=~"page|notify" + - team="bigmac" + continue: false + + # Team Clippy Slack + # ReRoute to `phoenix` until we change all team ownership labels + - receiver: team_phoenix_slack + matchers: + - severity=~"page|notify" + - team="clippy" + continue: false + + # Team Rocket Slack + - receiver: team_rocket_slack + matchers: + - severity=~"page|notify" + - team="rocket" + continue: false + + # Team Ops Slack + - receiver: team_ops_slack + matchers: + - severity=~"page|notify" + continue: true + + # Team Turtles Slack + - receiver: team_turtles_slack + matchers: + - severity=~"page|notify" + - team="turtles" + continue: false + +receivers: +- name: root + +- name: falco_noise_slack + slack_configs: + - channel: '#noise-falco' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" .}}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_atlas_slack + slack_configs: + - channel: '#alert-atlas-test' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" .}}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_phoenix_slack + slack_configs: + - channel: '#alert-phoenix-test' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" . }}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_bigmac_slack + slack_configs: + - channel: '#alert-bigmac-test' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" . }}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_rocket_slack + slack_configs: + - channel: '#alert-rocket-test' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" . }}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_shield_slack + slack_configs: + - channel: '#alert-shield' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" .}}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_turtles_slack + slack_configs: + - channel: '#alert-turtles-test' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" . }}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: team_tinkerers_slack + slack_configs: + - channel: '#alert-tinkerers' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" .}}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +- name: opsgenie_router + opsgenie_configs: + - api_key: opsgenie-key + tags: "{{ (index .Alerts 0).Labels.alertname }},{{ (index .Alerts 0).Labels.cluster_type }},{{ (index .Alerts 0).Labels.severity }},{{ (index .Alerts 0).Labels.team }},{{ (index .Alerts 0).Labels.area }},{{ (index .Alerts 0).Labels.service_priority }},aws,test-installation,testing" + +- name: team_ops_slack + slack_configs: + - channel: '#alert-test-test-installation' + send_resolved: true + actions: + - type: button + text: ':green_book: OpsRecipe' + url: 'https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}' + style: '{{ if eq .Status "firing" }}primary{{ else }}default{{ end }}' + - type: button + text: ':coffin: Linked PMs' + url: '{{ template "__alert_linked_postmortems" . }}' + - type: button + text: ':mag: Query' + url: '{{ (index .Alerts 0).GeneratorURL }}' + - type: button + text: ':grafana: Dashboard' + url: 'https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}' + - type: button + text: ':no_bell: Silence' + url: '{{ template "__alert_silence_link" . }}' + style: '{{ if eq .Status "firing" }}danger{{ else }}default{{ end }}' + +inhibit_rules: +- source_matchers: + - inhibit_kube_state_metrics_down=true + target_matchers: + - cancel_if_kube_state_metrics_down=true + equal: [cluster_id] + +- source_matchers: + - inhibit_kube_state_metrics_down=true + - cluster_id=test-installation + target_matchers: + - cancel_if_mc_kube_state_metrics_down=true + +- source_matchers: + - inhibit_kube_state_metrics_down=true + target_matchers: + - cancel_if_any_kube_state_metrics_down=true + +- source_matchers: + - cluster_status_creating=true + target_matchers: + - cancel_if_cluster_status_creating=true + equal: [cluster_id] + +- source_matchers: + - cluster_status_created=true + target_matchers: + - cancel_if_cluster_status_created=true + equal: [cluster_id] + +- source_matchers: + - cluster_status_updating=true + target_matchers: + - cancel_if_cluster_status_updating=true + equal: [cluster_id] + +- source_matchers: + - cluster_status_updated=true + target_matchers: + - cancel_if_cluster_status_updated=true + equal: [cluster_id] + +- source_matchers: + - cluster_status_deleting=true + target_matchers: + - cancel_if_cluster_status_deleting=true + equal: [cluster_id] + +- source_matchers: + - cluster_with_no_nodepools=true + target_matchers: + - cancel_if_cluster_with_no_nodepools=true + equal: [cluster_id] + +- source_matchers: + - cluster_with_scaling_nodepools=true + target_matchers: + - cancel_if_cluster_with_scaling_nodepools=true + equal: [cluster_id] + +- source_matchers: + - cluster_with_notready_nodepools=true + target_matchers: + - cancel_if_cluster_with_notready_nodepools=true + equal: [cluster_id] + +- source_matchers: + - instance_state_not_running=true + target_matchers: + - cancel_if_instance_state_not_running=true + equal: [node] + +- source_matchers: + - kiam_has_errors=true + target_matchers: + - cancel_if_kiam_has_errors=true + equal: [cluster_id] + +- source_matchers: + - kubelet_down=true + target_matchers: + - cancel_if_kubelet_down=true + equal: [cluster_id, ip] + +- source_matchers: + - kubelet_down=true + target_matchers: + - cancel_if_any_kubelet_down=true + equal: [cluster_id] + +- source_matchers: + - kubelet_not_ready=true + target_matchers: + - cancel_if_kubelet_not_ready=true + equal: [cluster_id, ip] + +- source_matchers: + - kubelet_not_ready=true + target_matchers: + - cancel_if_any_kubelet_not_ready=true + equal: [cluster_id] + +- source_matchers: + - nodes_down=true + target_matchers: + - cancel_if_nodes_down=true + equal: [cluster_id] + +- source_matchers: + - scrape_timeout=true + target_matchers: + - cancel_if_scrape_timeout=true + equal: [cluster_id, instance] + +- source_matchers: + - control_plane_node_down=true + target_matchers: + - cancel_if_control_plane_node_down=true + equal: [cluster_id] + +- source_matchers: + - apiserver_down=true + target_matchers: + - cancel_if_apiserver_down=true + equal: [cluster_id] + +- source_matchers: + - apiserver_down=true + target_matchers: + - cancel_if_any_apiserver_down=true + +- source_matchers: + - outside_working_hours=true + target_matchers: + - cancel_if_outside_working_hours=true + +- source_matchers: + - has_worker_nodes=false + target_matchers: + - cancel_if_cluster_has_no_workers=true + equal: [cluster_id] + +- source_matchers: + - cluster_is_not_running_prometheus_agent=true + target_matchers: + - cancel_if_cluster_is_not_running_prometheus_agent=true + equal: [cluster_id] + +- source_matchers: + - inhibit_prometheus_agent_down=true + target_matchers: + - cancel_if_prometheus_agent_down=true + equal: [cluster_id] + +- source_matchers: + - stack_failed=true + target_matchers: + - cancel_if_stack_failed=true diff --git a/service/controller/resource/alerting/alertmanagerconfig/test/notification-template/case-6-cluster-api-eks.golden b/service/controller/resource/alerting/alertmanagerconfig/test/notification-template/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..7e8e54c13 --- /dev/null +++ b/service/controller/resource/alerting/alertmanagerconfig/test/notification-template/case-6-cluster-api-eks.golden @@ -0,0 +1,65 @@ +{{ define "__alertmanager" }}Alertmanager{{ end }} +{{ define "__alertmanagerurl" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}&silenced=false&inhibited=false&active=true&filter=%7Balertname%3D%22{{ .CommonLabels.alertname }}%22%7D{{ end }} +{{ define "__dashboardurl" -}}{{ if hasPrefix "https://" (index .Alerts 0).Annotations.dashboard }}{{ (index .Alerts 0).Annotations.dashboard }}{{ else }}https://grafana/d/{{ (index .Alerts 0).Annotations.dashboard }}{{ end }}{{- end }} +{{ define "__runbookurl" -}}https://intranet.giantswarm.io/docs/support-and-ops/ops-recipes/{{ (index .Alerts 0).Annotations.opsrecipe }}{{- end }} + +{{ define "slack.default.title" }}{{ .Status | toUpper }}[{{ if eq .Status "firing" }}{{ .Alerts.Firing | len }}{{- else }}{{ .Alerts.Resolved | len }}{{- end }}] {{ (index .Alerts 0).Labels.alertname }} - Team {{ (index .Alerts 0).Labels.team }}{{ end }} +{{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }} +{{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }} +{{ define "slack.default.pretext" }}{{ end }} +{{ define "slack.default.titlelink" }}{{ template "__alertmanagerurl" . }}{{ end }} +{{ define "slack.default.iconemoji" }}{{ end }} +{{ define "slack.default.iconurl" }}{{ end }} +{{ define "slack.default.text" }}*Cluster:* {{ (index .Alerts 0).Labels.installation }}{{ if (index .Alerts 0).Labels.cluster_id }} / {{ (index .Alerts 0).Labels.cluster_id }}{{ end }}{{ if (index .Alerts 0).Labels.service }} / {{ (index .Alerts 0).Labels.service }}{{ end }} +*Area:* {{ (index .Alerts 0).Labels.area }} / {{ (index .Alerts 0).Labels.topic }} +{{- if (index .Alerts 0).Annotations.description }} +*Instances* +{{ if eq .Status "firing" }} +{{ range .Alerts.Firing }} +:fire: {{ if .Labels.instance }}{{ .Labels.instance }}: {{ end }}{{ .Annotations.description }}{{- end }} +{{ else }} +{{ range .Alerts.Resolved }} +:success: {{ if .Labels.instance }}{{ .Labels.instance }}: {{ end }}{{ .Annotations.description }}{{- end }} +{{ end }} +{{- end }} +{{ end }} + + +{{ define "opsgenie.default.message" }}{{ .GroupLabels.installation }} / {{ .GroupLabels.cluster_id }}{{ if (index .Alerts 0).Labels.service }} / {{ (index .Alerts 0).Labels.service }}{{ end }} - {{ index (index .Alerts.Firing 0).Labels `alertname`}}{{ end }} +{{ define "opsgenie.default.source" }}{{ template "__alertmanager" . }}{{ end }} +{{ define "opsgenie.default.description" }}* Team: {{ (index .Alerts 0).Labels.team }} +* Area: {{ (index .Alerts 0).Labels.area }} / {{ (index .Alerts 0).Labels.topic }} + +* Instances:{{ range .Alerts.Firing }} +🔥 {{ if .Labels.instance }}{{ .Labels.instance }}: {{ end }}{{ .Annotations.description }}{{ end }} + +--- + +{{ if (index .Alerts 0).Annotations.opsrecipe }}📗 Runbook: {{ template "__runbookurl" . }}{{- end }} +🔔 Alertmanager {{ template "__alertmanagerurl" . }} +{{- if (index .Alerts 0).Annotations.dashboard }}📈 Dashboard: {{ template "__dashboardurl" . }}{{- end }} +👀 Prometheus: {{ (index .Alerts 0).GeneratorURL }} + +--- + +{{ if not (index .Alerts 0).Annotations.opsrecipe }}⚠️ There is no **runbook** for this alert, time to get your pen.{{- end }} +{{ if not (index .Alerts 0).Annotations.dashboard }}⚠️ There is no **dashboard** for this alert, time to sketch.{{- end }} +{{- end }} + +# This builds the silence URL. We exclude the alertname in the range +# to avoid the issue of having trailing comma separator (%2C) at the end +# of the generated URL +{{ define "__alert_silence_link" -}} + {{ .ExternalURL }}/#/silences/new?filter=%7B + {{- range .CommonLabels.SortedPairs -}} + {{- if ne .Name "alertname" -}} + {{- .Name }}%3D"{{- .Value -}}"%2C%20 + {{- end -}} + {{- end -}} + alertname%3D"{{ .CommonLabels.alertname }}"%7D +{{- end }} + +# Link to related PMs +{{ define "__alert_linked_postmortems" -}} +https://github.com/giantswarm/giantswarm/issues?q=is%3Aissue+is%3Aopen+label%3Apostmortem+label%3Aalert%2F{{ .CommonLabels.alertname }} +{{- end }} diff --git a/service/controller/resource/alerting/alertmanagerwiring/test/case-6-cluster-api-eks.golden b/service/controller/resource/alerting/alertmanagerwiring/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..423d6dbad --- /dev/null +++ b/service/controller/resource/alerting/alertmanagerwiring/test/case-6-cluster-api-eks.golden @@ -0,0 +1,6 @@ +- static_configs: + - targets: + - alertmanager-operated.monitoring.svc:9093 + scheme: http + timeout: 10s + api_version: v2 diff --git a/service/controller/resource/alerting/heartbeatwebhookconfig/test/case-6-cluster-api-eks.golden b/service/controller/resource/alerting/heartbeatwebhookconfig/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..ada1003c2 --- /dev/null +++ b/service/controller/resource/alerting/heartbeatwebhookconfig/test/case-6-cluster-api-eks.golden @@ -0,0 +1,32 @@ +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: alertmanager + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: alertmanager + name: eks-sample + namespace: monitoring +spec: + receivers: + - name: heartbeat_test-installation_eks-sample + webhookConfigs: + - httpConfig: + authorization: + credentials: + key: opsGenieApiKey + name: alertmanager-global + type: GenieKey + sendResolved: false + url: https://api.opsgenie.com/v2/heartbeats/test-installation-eks-sample/ping + route: + groupInterval: 30s + groupWait: 30s + matchers: + - name: cluster_id + value: eks-sample + - name: installation + value: test-installation + - name: type + value: heartbeat + receiver: heartbeat_test-installation_eks-sample + repeatInterval: 15m diff --git a/service/controller/resource/monitoring/ingress/test/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/ingress/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..8b7e19125 --- /dev/null +++ b/service/controller/resource/monitoring/ingress/test/case-6-cluster-api-eks.golden @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/auth-signin: https://$host/oauth2/start?rd=$escaped_request_uri + nginx.ingress.kubernetes.io/auth-url: https://$host/oauth2/auth + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/ingress/test/externaldns-with-restricted-access/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/ingress/test/externaldns-with-restricted-access/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..4f2c8fbaa --- /dev/null +++ b/service/controller/resource/monitoring/ingress/test/externaldns-with-restricted-access/case-6-cluster-api-eks.golden @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + external-dns.alpha.kubernetes.io/hostname: prometheus.3lkdj.test.gigantic.io + giantswarm.io/external-dns: managed + nginx.ingress.kubernetes.io/auth-signin: https://$host/oauth2/start?rd=$escaped_request_uri + nginx.ingress.kubernetes.io/auth-url: https://$host/oauth2/auth + nginx.ingress.kubernetes.io/whitelist-source-range: 21.10.178/24 + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus.3lkdj.test.gigantic.io + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/ingress/test/externaldns/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/ingress/test/externaldns/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..0e6a5794a --- /dev/null +++ b/service/controller/resource/monitoring/ingress/test/externaldns/case-6-cluster-api-eks.golden @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + external-dns.alpha.kubernetes.io/hostname: prometheus + giantswarm.io/external-dns: managed + nginx.ingress.kubernetes.io/auth-signin: https://$host/oauth2/start?rd=$escaped_request_uri + nginx.ingress.kubernetes.io/auth-url: https://$host/oauth2/auth + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/ingress/test/restricted-access/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/ingress/test/restricted-access/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..45edc7a80 --- /dev/null +++ b/service/controller/resource/monitoring/ingress/test/restricted-access/case-6-cluster-api-eks.golden @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/auth-signin: https://$host/oauth2/start?rd=$escaped_request_uri + nginx.ingress.kubernetes.io/auth-url: https://$host/oauth2/auth + nginx.ingress.kubernetes.io/whitelist-source-range: 21.10.178/24 + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/prometheus/test/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/prometheus/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..0781d34ec --- /dev/null +++ b/service/controller/resource/monitoring/prometheus/test/case-6-cluster-api-eks.golden @@ -0,0 +1,122 @@ +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: eks-sample + namespace: eks-sample-prometheus +spec: + additionalAlertManagerConfigs: + key: alertmanager-additional.yaml + name: alertmanager-config + additionalScrapeConfigs: + key: prometheus-additional.yaml + name: additional-scrape-configs + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: DoesNotExist + apiserverConfig: + bearerTokenFile: /etc/prometheus/secrets/cluster-certificates/token + host: https://master.eks-sample:443 + tlsConfig: + ca: {} + caFile: /etc/prometheus/secrets/cluster-certificates/ca + cert: {} + arbitraryFSAccessThroughSMs: {} + enableFeatures: + - remote-write-receiver + evaluationInterval: 60s + externalLabels: + cluster_id: eks-sample + cluster_type: workload_cluster + customer: Giant Swarm + installation: test-installation + pipeline: testing + provider: provider + region: onprem + externalUrl: http://prometheus/eks-sample + image: quay.io/giantswarm/prometheus:v2.28.1 + logLevel: debug + podMetadata: + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + giantswarm.io/monitoring: "true" + priorityClassName: prometheus + replicas: 1 + resources: + limits: + cpu: 150m + memory: "1073741824" + requests: + cpu: 100m + memory: "1073741824" + retention: 2w + retentionSize: 85GiB + routePrefix: /eks-sample + ruleNamespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: Exists + ruleSelector: + matchExpressions: + - key: cluster_type + operator: NotIn + values: + - management_cluster + - key: application.giantswarm.io/team + operator: Exists + rules: + alert: {} + scrapeInterval: 60s + secrets: + - cluster-certificates + securityContext: + fsGroup: 2000 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 1000 + serviceMonitorNamespaceSelector: + matchExpressions: + - key: nonexistentkey + operator: Exists + serviceMonitorSelector: + matchExpressions: + - key: nonexistentkey + operator: Exists + storage: + volumeClaimTemplate: + metadata: {} + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + status: {} + topologySpreadConstraints: + - labelSelector: + matchLabels: + app.kubernetes.io/name: prometheus + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + tsdb: {} + version: v2.28.1 + walCompression: true + web: + pageTitle: test-installation/eks-sample Prometheus +status: + availableReplicas: 0 + paused: false + replicas: 0 + unavailableReplicas: 0 + updatedReplicas: 0 diff --git a/service/controller/resource/monitoring/remotewriteingress/test/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/remotewriteingress/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..8c483e42c --- /dev/null +++ b/service/controller/resource/monitoring/remotewriteingress/test/case-6-cluster-api-eks.golden @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/auth-realm: Authentication Required + nginx.ingress.kubernetes.io/auth-secret: remote-write-ingress-auth + nginx.ingress.kubernetes.io/auth-type: basic + nginx.ingress.kubernetes.io/client-body-buffer-size: 50m + nginx.ingress.kubernetes.io/proxy-body-size: 50m + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample-remote-write + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample/api/v1/write + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/remotewriteingress/test/externaldns/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/remotewriteingress/test/externaldns/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..73e671d42 --- /dev/null +++ b/service/controller/resource/monitoring/remotewriteingress/test/externaldns/case-6-cluster-api-eks.golden @@ -0,0 +1,34 @@ +apiVersion: v1 +kind: Ingress +metadata: + annotations: + external-dns.alpha.kubernetes.io/hostname: prometheus + giantswarm.io/external-dns: managed + nginx.ingress.kubernetes.io/auth-realm: Authentication Required + nginx.ingress.kubernetes.io/auth-secret: remote-write-ingress-auth + nginx.ingress.kubernetes.io/auth-type: basic + nginx.ingress.kubernetes.io/client-body-buffer-size: 50m + nginx.ingress.kubernetes.io/proxy-body-size: 50m + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus-eks-sample-remote-write + namespace: eks-sample-prometheus +spec: + ingressClassName: nginx + rules: + - host: prometheus + http: + paths: + - backend: + service: + name: prometheus-operated + port: + number: 9090 + path: /eks-sample/api/v1/write + pathType: ImplementationSpecific +status: + loadBalancer: {} diff --git a/service/controller/resource/monitoring/scrapeconfigs/resource.go b/service/controller/resource/monitoring/scrapeconfigs/resource.go index cdf40c554..55435daa0 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/resource.go +++ b/service/controller/resource/monitoring/scrapeconfigs/resource.go @@ -262,38 +262,53 @@ func getObservabilityBundleAppVersion(ctx context.Context, ctrlClient client.Cli func listTargetsToIgnore(ctx context.Context, ctrlClient client.Client, cluster metav1.Object, config Config) ([]string, error) { ignoredTargets := make([]string, 0) - appVersion, err := getObservabilityBundleAppVersion(ctx, ctrlClient, cluster, config) - if err != nil { - return nil, microerror.Mask(err) - } + if key.IsEKSCluster(cluster) { + // In case of EKS clusters, we assume scraping targets via ServiceMonitors, + // so we ignore them from the Prometheus scrape config + config.Logger.Debugf(ctx, "EKS cluster: ignoring all scraping targets in Prometheus scrape config") + ignoredTargets = append(ignoredTargets, + "prometheus-operator-app", + "kube-apiserver", + "kube-controller-manager", + "kube-scheduler", + "node-exporter", + "kubelet", + "coredns", + "kube-state-metrics", + "etcd") + } else { + appVersion, err := getObservabilityBundleAppVersion(ctx, ctrlClient, cluster, config) + if err != nil { + return nil, microerror.Mask(err) + } - version, err := semver.Parse(appVersion) - if err != nil { - return nil, microerror.Mask(err) - } + version, err := semver.Parse(appVersion) + if err != nil { + return nil, microerror.Mask(err) + } - initialBundleVersion, err := semver.Parse("0.1.0") - if err != nil { - return nil, microerror.Mask(err) - } + initialBundleVersion, err := semver.Parse("0.1.0") + if err != nil { + return nil, microerror.Mask(err) + } - bundleWithKSMAndExportersVersion, err := semver.Parse("0.4.0") - if err != nil { - return nil, microerror.Mask(err) - } + bundleWithKSMAndExportersVersion, err := semver.Parse("0.4.0") + if err != nil { + return nil, microerror.Mask(err) + } - if version.GTE(initialBundleVersion) { - ignoredTargets = append(ignoredTargets, "prometheus-operator-app", "kube-apiserver", "kube-controller-manager", "kube-scheduler", "node-exporter") - } + if version.GTE(initialBundleVersion) { + ignoredTargets = append(ignoredTargets, "prometheus-operator-app", "kube-apiserver", "kube-controller-manager", "kube-scheduler", "node-exporter") + } - if version.GTE(bundleWithKSMAndExportersVersion) { - ignoredTargets = append(ignoredTargets, "kubelet", "coredns", "kube-state-metrics") + if version.GTE(bundleWithKSMAndExportersVersion) { + ignoredTargets = append(ignoredTargets, "kubelet", "coredns", "kube-state-metrics") - if key.IsCAPIManagementCluster(config.Provider) { - ignoredTargets = append(ignoredTargets, "etcd") + if key.IsCAPIManagementCluster(config.Provider) { + ignoredTargets = append(ignoredTargets, "etcd") + } } } - // Vintage WC if !key.IsCAPIManagementCluster(config.Provider) && !key.IsManagementCluster(config.Installation, cluster) { // Since 18.0.0 we cannot scrape k8s endpoints externally so we ignore those targets. diff --git a/service/controller/resource/monitoring/scrapeconfigs/resource_test.go b/service/controller/resource/monitoring/scrapeconfigs/resource_test.go index f84ce2973..86d73ae63 100644 --- a/service/controller/resource/monitoring/scrapeconfigs/resource_test.go +++ b/service/controller/resource/monitoring/scrapeconfigs/resource_test.go @@ -119,6 +119,7 @@ func TestAWSScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } @@ -220,6 +221,7 @@ func TestAzureScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } @@ -322,6 +324,7 @@ func TestKVMScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } @@ -444,6 +447,7 @@ func TestOpenStackScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } @@ -566,6 +570,7 @@ func TestGCPScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } @@ -688,6 +693,7 @@ func TestCAPAScrapeconfigs(t *testing.T) { K8sClient: k8sClient, Vault: "vault1.some-installation.test", Installation: "test-installation", + Logger: logger, } return toData(context.Background(), client, v, config) } diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..48f080cf6 --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/aws/case-6-cluster-api-eks.golden @@ -0,0 +1,490 @@ + +# Add scrape configuration for docker +- job_name: eks-sample-prometheus/docker-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:9323/proxy/metrics + - target_label: app + replacement: docker + - source_labels: [__meta_kubernetes_node_address_InternalIP] + replacement: ${1}:9323 + target_label: instance + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + - source_labels: [__name__] + regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) + action: keep +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: false + bearer_token_file: /etc/prometheus/secrets/cluster-certificates/token + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: aws + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..d573fa28b --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/azure/case-6-cluster-api-eks.golden @@ -0,0 +1,500 @@ + +# Add scrape configuration for docker +- job_name: eks-sample-prometheus/docker-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:9323/proxy/metrics + - target_label: app + replacement: docker + - source_labels: [__meta_kubernetes_node_address_InternalIP] + replacement: ${1}:9323 + target_label: instance + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + - source_labels: [__name__] + regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) + action: keep +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: azure + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..571aa6899 --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/capa/case-6-cluster-api-eks.golden @@ -0,0 +1,444 @@ + +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: capa + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..b6a2fd2c8 --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/gcp/case-6-cluster-api-eks.golden @@ -0,0 +1,444 @@ + +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: gcp + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..8fd41662d --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/kvm/case-6-cluster-api-eks.golden @@ -0,0 +1,500 @@ + +# Add scrape configuration for docker +- job_name: eks-sample-prometheus/docker-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: node + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_node_name] + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}:9323/proxy/metrics + - target_label: app + replacement: docker + - source_labels: [__meta_kubernetes_node_address_InternalIP] + replacement: ${1}:9323 + target_label: instance + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + metric_relabel_configs: + - source_labels: [__name__] + regex: (engine_daemon_image_actions_seconds_count|process_virtual_memory_bytes|process_resident_memory_bytes) + action: keep +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: kvm + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..60b981f67 --- /dev/null +++ b/service/controller/resource/monitoring/scrapeconfigs/test/openstack/case-6-cluster-api-eks.golden @@ -0,0 +1,444 @@ + +# calico-node +- job_name: eks-sample-prometheus/calico-node-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + relabel_configs: + - source_labels: [__address__] + replacement: ${1}:9091 + target_label: instance + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (calico-node.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9091/proxy/metrics + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name] + regex: kube-system;calico-node.* + action: keep + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: app + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +# kube-proxy +- job_name: eks-sample-prometheus/kube-proxy-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + replacement: $1:10249 + target_label: instance + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (kube-proxy.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:10249/proxy/metrics + - target_label: app + replacement: kube-proxy + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop +# cert-exporter +- job_name: eks-sample-prometheus/cert-exporter-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - kube-system + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__meta_kubernetes_service_label_app] + regex: cert-exporter + action: keep + - target_label: __address__ + replacement: master.eks-sample:443 + - source_labels: [__meta_kubernetes_pod_name] + regex: (cert-exporter.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/kube-system/pods/${1}:9005/proxy/metrics + - source_labels: [__meta_kubernetes_service_label_app] + target_label: app + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: drop + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo +- job_name: eks-sample-prometheus/workload-eks-sample/0 + honor_labels: true + scheme: https + kubernetes_sd_configs: + - role: endpoints + api_server: https://master.eks-sample:443 + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: false + tls_config: + ca_file: /etc/prometheus/secrets/cluster-certificates/ca + cert_file: /etc/prometheus/secrets/cluster-certificates/crt + key_file: /etc/prometheus/secrets/cluster-certificates/key + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotationpresent_giantswarm_io_monitoring, __meta_kubernetes_service_labelpresent_giantswarm_io_monitoring] + regex: .*(true).* + action: keep + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_path is present, we use it as the metrics path + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # if __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port, we use it as the metrics port + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_port] + action: replace + target_label: __address__ + regex: ([^:]+):(\d+);(\d+) + replacement: $1:$3 + # if the protocol is empty, we set it to http by default, this allows to override the protocol for services using https like prometheus operator + - source_labels: [__address__, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol] + action: replace + target_label: __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol + regex: (.*); + replacement: "http" + - source_labels: [__meta_kubernetes_pod_ip, __address__] + regex: (.*);([^:]+):(\d+) + replacement: $1:$3 + target_label: instance + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_annotation_giantswarm_io_monitoring_protocol, __meta_kubernetes_pod_name, __address__, __metrics_path__] + regex: (.*);(.*);(.*);(.+:)(\d+);(.*) + target_label: __metrics_path__ + replacement: /api/v1/namespaces/${1}/pods/${2}:${3}:${5}/proxy${6} + action: replace + - regex: (.*) + target_label: __address__ + replacement: master.eks-sample:443 + action: replace + - source_labels: [__meta_kubernetes_service_name] + regex: (.*) + target_label: app + action: replace + - source_labels: [__meta_kubernetes_service_annotation_giantswarm_io_monitoring_app_label] + regex: (.+) + target_label: app + action: replace + # Add namespace label. + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + # Add pod label. + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + # Add container label. + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + # Add node label. + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: node + # Add role label. + - source_labels: [__meta_kubernetes_node_label_role] + target_label: role + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo + metric_relabel_configs: + - source_labels: [container] + regex: prometheus-operator-app + action: drop + - source_labels: [app] + regex: coredns + action: drop + - source_labels: [app] + regex: kube-state-metrics + action: drop + # drop unused nginx metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: nginx_ingress_controller_(bytes_sent_bucket|request_size_bucket|response_duration_seconds_bucket|response_size_bucket|request_duration_seconds_count|connect_duration_seconds_bucket|header_duration_seconds_bucket|bytes_sent_count|request_duration_seconds_sum|bytes_sent_sum|request_size_count|response_size_count|response_duration_seconds_sum|response_duration_seconds_count|ingress_upstream_latency_seconds|ingress_upstream_latency_seconds_sum|ingress_upstream_latency_seconds_count) + action: drop + # drop unused kong metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kong_(upstream_target_health|latency_bucket|latency_count|latency_sum) + action: drop + # drop unused kube-state-metrics metrics with the highest cardinality as they increase Prometheus memory usage + - source_labels: [__name__] + regex: kube_(.+_annotations|secret_type|pod_status_qos_class|pod_tolerations|pod_status_scheduled|replicaset_metadata_generation|replicaset_status_observed_generation|replicaset_annotations|replicaset_status_fully_labeled_replicas|.+_metadata_resource_version) + action: drop + # drop unused promtail/loki metrics + - source_labels: [__name__] + regex: promtail_request_duration_seconds_bucket|loki_request_duration_seconds_bucket + action: drop + # drop unused rest client metrics + - source_labels: [__name__] + regex: rest_client_(rate_limiter_duration_seconds_bucket|request_size_bytes_bucket|response_size_bytes_bucket) + action: drop + # drop image_id label from kube-state-metrics + - source_labels: [app,image_id] + separator: ; + regex: kube-state-metrics;(.+) + replacement: "" + action: replace + target_label: image_id + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: deployment + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: daemonset + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_type + replacement: statefulset + action: replace + - source_labels: [app,deployment] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,daemonset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,statefulset] + separator: ; + regex: kube-state-metrics;(.+) + target_label: workload_name + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_region] + separator: ; + regex: kube-state-metrics;(.+) + target_label: region + replacement: ${1} + action: replace + - source_labels: [app,label_topology_kubernetes_io_zone] + separator: ; + regex: kube-state-metrics;(.+) + target_label: zone + replacement: ${1} + action: replace + - action: labeldrop + regex: label_topology_kubernetes_io_region|label_topology_kubernetes_io_zone + # Override with label for AWS clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_deployment] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + # Override with label for Azure clusters if exists. + - source_labels: [app,label_giantswarm_io_machine_pool] + regex: kube-state-metrics;(.+) + target_label: nodepool + replacement: ${1} + action: replace + - action: labeldrop + regex: label_giantswarm_io_machine_pool|label_giantswarm_io_machine_deployment +# prometheus +- job_name: eks-sample-prometheus/prometheus-eks-sample/0 + honor_labels: true + scheme: http + metrics_path: /eks-sample/metrics + static_configs: + - targets: ['localhost:9090'] + relabel_configs: + - replacement: prometheus + target_label: app + # Add cluster_id label. + - target_label: cluster_id + replacement: eks-sample + # Add cluster_type label. + - target_label: cluster_type + replacement: workload_cluster + # Add provider label. + - target_label: provider + replacement: openstack + # Add installation label. + - target_label: installation + replacement: test-installation + # Add priority label. + - target_label: service_priority + replacement: highest + # Add organization label. + - target_label: organization + replacement: my-organization + # Add customer label. + - target_label: customer + replacement: pmo diff --git a/service/controller/resource/monitoring/verticalpodautoscaler/test/case-6-cluster-api-eks.golden b/service/controller/resource/monitoring/verticalpodautoscaler/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..d9b572e51 --- /dev/null +++ b/service/controller/resource/monitoring/verticalpodautoscaler/test/case-6-cluster-api-eks.golden @@ -0,0 +1,28 @@ +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: prometheus + namespace: eks-sample-prometheus +spec: + resourcePolicy: + containerPolicies: + - containerName: prometheus + controlledValues: RequestsAndLimits + maxAllowed: + cpu: "4" + memory: "13743895347" + minAllowed: + cpu: 100m + memory: "1073741824" + mode: Auto + targetRef: + apiVersion: apps/v1 + kind: StatefulSet + name: prometheus-eks-sample + updatePolicy: + updateMode: Auto +status: {} diff --git a/service/controller/resource/namespace/test/case-6-cluster-api-eks.golden b/service/controller/resource/namespace/test/case-6-cluster-api-eks.golden new file mode 100644 index 000000000..e4dd548e7 --- /dev/null +++ b/service/controller/resource/namespace/test/case-6-cluster-api-eks.golden @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Namespace +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: eks-sample + app.kubernetes.io/managed-by: prometheus-meta-operator + app.kubernetes.io/name: prometheus + giantswarm.io/cluster: eks-sample + name: eks-sample-prometheus +spec: {} +status: {} diff --git a/service/key/key.go b/service/key/key.go index 3b1f0781d..79442c849 100644 --- a/service/key/key.go +++ b/service/key/key.go @@ -296,6 +296,13 @@ func IsManagementCluster(installation string, obj interface{}) bool { } } +func IsEKSCluster(obj interface{}) bool { + if c, ok := obj.(*capi.Cluster); ok { + return c.Spec.InfrastructureRef.Kind == "AWSManagedCluster" + } + return false +} + func ClusterType(installation string, obj interface{}) string { if IsManagementCluster(installation, obj) { return "management_cluster"