From c66e8127577682c593ea41df5f69e42cf814520d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20=C5=9Awi=C4=85tek?= Date: Wed, 26 Apr 2023 20:50:16 +0200 Subject: [PATCH] fix(metrics): exclude metrics collector from webhooks --- deploy/helm/sumologic/README.md | 2 +- .../metrics/collector/otelcol/config.yaml | 38 +-- .../sumologic/templates/_helpers/_metrics.tpl | 26 +- .../collector/otelcol/clusterrole.yaml | 2 +- .../collector/otelcol/clusterrolebinding.yaml | 4 +- .../otelcol/opentelemetrycollector.yaml | 2 +- .../collector/otelcol/serviceaccount.yaml | 2 +- .../otelcol/targetallocator-clusterrole.yaml | 2 +- .../targetallocator-clusterrolebinding.yaml | 2 +- .../targetallocator-serviceaccount.yaml | 2 +- deploy/helm/sumologic/values.yaml | 7 + .../metrics_collector_otc/basic.output.yaml | 227 +++++++++--------- tests/integration/features.go | 8 +- .../values/values_helm_ot_metrics.yaml | 5 - 14 files changed, 170 insertions(+), 159 deletions(-) diff --git a/deploy/helm/sumologic/README.md b/deploy/helm/sumologic/README.md index 574e35645b..0a6b4aeebd 100644 --- a/deploy/helm/sumologic/README.md +++ b/deploy/helm/sumologic/README.md @@ -416,7 +416,7 @@ The following table lists the configurable parameters of the Sumo Logic chart an | `opentelemetry-operator.instrumentationNamespaces` | Used to create `Instrumentation` resources in specified namespaces. | `Nil` | | `opentelemetry-operator.instrumentationJobImage.image.repository` | Name of the image repository used to apply Instrumentation resource | `sumologic/kubernetes-tools` | | `opentelemetry-operator.instrumentationJobImage.image.tag` | Name of the image tag used to apply Instrumentation resource | `2.14.0` | -| `opentelemetry-operator.admissionWebhooks` | Admission webhooks make sure only requests with correctly formatted rules will get into the Operator. They also enable the sidecar injection for OpenTelemetryCollector and Instrumentation CR's. | `{"failurePolicy": "Fail", "enabled": true, "certManager": {"enabled": false, "issuerRef": {}}}` | +| `opentelemetry-operator.admissionWebhooks` | Admission webhooks make sure only requests with correctly formatted rules will get into the Operator. They also enable the sidecar injection for OpenTelemetryCollector and Instrumentation CR's. | See [values.yaml] | | `opentelemetry-operator.manager.env` | Additional environment variables for opentelemetry-operator helm chart. | `Nil` | | `otelcolInstrumentation.enabled` | Enables Sumo OTel Distro Collector StatefulSet to collect telemetry data. | `true` | | `otelcolInstrumentation.statefulset.replicaCount` | Set the number of otelcol-instrumentation replicasets. | `3` | diff --git a/deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml b/deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml index 0ef256639f..4d2357e51a 100644 --- a/deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml +++ b/deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml @@ -1,3 +1,23 @@ +exporters: +{{ tpl (.Files.Get "conf/metrics/otelcol/exporters.yaml") . | indent 2 }} + +extensions: + health_check: {} +{{ if .Values.metadata.persistence.enabled }} + ## Configuration for File Storage extension + ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/release/v0.37.x/extension/storage/filestorage + file_storage: + directory: /var/lib/storage/otc + timeout: 10s + compaction: + on_rebound: true + directory: /tmp +{{ end }} + pprof: {} + +processors: +{{ tpl (.Files.Get "conf/metrics/otelcol/processors.yaml") . | indent 2 }} + receivers: prometheus: config: @@ -88,23 +108,7 @@ receivers: endpoint: http://{{ template "sumologic.metadata.name.metrics.targetallocator.name" . }} interval: 30s collector_id: ${POD_NAME} -extensions: - health_check: {} -{{ if .Values.metadata.persistence.enabled }} - ## Configuration for File Storage extension - ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/release/v0.37.x/extension/storage/filestorage - file_storage: - directory: /var/lib/storage/otc - timeout: 10s - compaction: - on_rebound: true - directory: /tmp -{{ end }} - pprof: {} -exporters: -{{ tpl (.Files.Get "conf/metrics/otelcol/exporters.yaml") . | indent 2 }} -processors: -{{ tpl (.Files.Get "conf/metrics/otelcol/processors.yaml") . | indent 2 }} + service: telemetry: logs: diff --git a/deploy/helm/sumologic/templates/_helpers/_metrics.tpl b/deploy/helm/sumologic/templates/_helpers/_metrics.tpl index 19afe2fd76..4122d8631f 100644 --- a/deploy/helm/sumologic/templates/_helpers/_metrics.tpl +++ b/deploy/helm/sumologic/templates/_helpers/_metrics.tpl @@ -220,28 +220,34 @@ sumologic.com/scrape: "true" Definitions for metrics collector */}} +{{- define "sumologic.labels.component.metrics" -}} +sumologic.com/component: metrics +{{- end -}} + {{- define "sumologic.labels.app.metrics.collector" -}} -{{- template "sumologic.fullname" . }}-metrics +sumologic.com/app: otelcol {{- end -}} {{- define "sumologic.labels.app.metrics.collector.pod" -}} -{{- template "sumologic.labels.app.metrics.collector" . }} +{{ template "sumologic.labels.app.metrics.collector" . }} +{{ template "sumologic.labels.component.metrics" . }} {{- end -}} -{{- define "sumologic.labels.app.metrics.clusterrole" -}} -{{- template "sumologic.labels.app.metrics.collector" . }} +{{- define "sumologic.labels.metrics.clusterrole" -}} +{{- template "sumologic.labels.component.metrics" . }} {{- end -}} -{{- define "sumologic.labels.app.metrics.clusterrolebinding" -}} -{{- template "sumologic.labels.app.metrics.collector" . }} +{{- define "sumologic.labels.metrics.clusterrolebinding" -}} +{{- template "sumologic.labels.component.metrics" . }} {{- end -}} -{{- define "sumologic.labels.app.metrics.serviceaccount" -}} -{{- template "sumologic.labels.app.metrics.collector" . }} +{{- define "sumologic.labels.metrics.serviceaccount" -}} +{{- template "sumologic.labels.component.metrics" . }} {{- end -}} -{{- define "sumologic.labels.app.metrics.collector.opentelemetrycollector" -}} -{{- template "sumologic.labels.app.metrics.collector" . }} +{{- define "sumologic.labels.metrics.opentelemetrycollector" -}} +{{ template "sumologic.labels.app.metrics.collector" . }} +{{ template "sumologic.labels.component.metrics" . }} {{- end -}} {{- define "sumologic.metadata.name.metrics.collector" -}} diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrole.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrole.yaml index 8d65725849..e50c924e0c 100644 --- a/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrole.yaml +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrole.yaml @@ -4,7 +4,7 @@ kind: ClusterRole metadata: name: {{ template "sumologic.metadata.name.metrics.collector.clusterrole" . }} labels: - app: {{ template "sumologic.labels.app.metrics.clusterrole" . }} + {{- include "sumologic.labels.metrics.clusterrole" . | nindent 4 }} {{- include "sumologic.labels.common" . | nindent 4 }} rules: - apiGroups: [""] diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrolebinding.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrolebinding.yaml index 772d6b5ef5..fedcc60e7f 100644 --- a/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrolebinding.yaml +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrolebinding.yaml @@ -4,7 +4,7 @@ kind: ClusterRoleBinding metadata: name: {{ template "sumologic.metadata.name.metrics.collector.clusterrolebinding.prometheus" . }} labels: - app: {{ template "sumologic.labels.app.metrics.clusterrolebinding" . }} + {{- include "sumologic.labels.metrics.clusterrolebinding" . | nindent 4 }} {{- include "sumologic.labels.common" . | nindent 4 }} subjects: - kind: ServiceAccount @@ -20,7 +20,7 @@ kind: ClusterRoleBinding metadata: name: {{ template "sumologic.metadata.name.metrics.collector.clusterrolebinding.metadata" . }} labels: - app: {{ template "sumologic.labels.app.metrics.clusterrolebinding" . }} + {{- include "sumologic.labels.metrics.clusterrolebinding" . | nindent 4 }} {{- include "sumologic.labels.common" . | nindent 4 }} subjects: - kind: ServiceAccount diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml index 6cc5fc1a19..47721dbc7e 100644 --- a/deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml @@ -4,7 +4,7 @@ kind: OpenTelemetryCollector metadata: name: {{ template "sumologic.metadata.name.metrics.collector.opentelemetrycollector" . }} labels: - app: {{ template "sumologic.labels.app.metrics.collector" . }} + {{- include "sumologic.labels.metrics.opentelemetrycollector" . | nindent 4 }} {{- include "sumologic.labels.common" . | nindent 4 }} spec: mode: statefulset diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/serviceaccount.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/serviceaccount.yaml index ab024bd480..5f23773d0c 100644 --- a/deploy/helm/sumologic/templates/metrics/collector/otelcol/serviceaccount.yaml +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/serviceaccount.yaml @@ -4,6 +4,6 @@ kind: ServiceAccount metadata: name: {{ template "sumologic.metadata.name.metrics.collector.serviceaccount" . }} labels: - app: {{ template "sumologic.labels.app.metrics.serviceaccount" . }} + {{- include "sumologic.labels.metrics.serviceaccount" . | nindent 4 }} {{- include "sumologic.labels.common" . | nindent 4 }} {{- end }} diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrole.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrole.yaml index 28850e9633..3da496b71f 100644 --- a/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrole.yaml +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrole.yaml @@ -4,7 +4,7 @@ kind: ClusterRole metadata: name: {{ template "sumologic.metadata.name.metrics.targetallocator.clusterrole" . }} labels: - app: {{ template "sumologic.labels.app.metrics.clusterrole" . }} + {{- include "sumologic.labels.metrics.clusterrole" . | nindent 4 }} {{- include "sumologic.labels.common" . | nindent 4 }} rules: - apiGroups: [""] diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrolebinding.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrolebinding.yaml index 3754659b5a..f30ec905a3 100644 --- a/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrolebinding.yaml +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrolebinding.yaml @@ -4,7 +4,7 @@ kind: ClusterRoleBinding metadata: name: {{ template "sumologic.metadata.name.metrics.targetallocator.clusterrolebinding" . }} labels: - app: {{ template "sumologic.labels.app.metrics.clusterrolebinding" . }} + {{- include "sumologic.labels.metrics.clusterrolebinding" . | nindent 4 }} {{- include "sumologic.labels.common" . | nindent 4 }} subjects: - kind: ServiceAccount diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-serviceaccount.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-serviceaccount.yaml index c4cb6a6e90..6dbe96e7cf 100644 --- a/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-serviceaccount.yaml +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-serviceaccount.yaml @@ -4,6 +4,6 @@ kind: ServiceAccount metadata: name: {{ template "sumologic.metadata.name.metrics.targetallocator.serviceaccount" . }} labels: - app: {{ template "sumologic.labels.app.metrics.serviceaccount" . }} + {{- include "sumologic.labels.metrics.serviceaccount" . | nindent 4 }} {{- include "sumologic.labels.common" . | nindent 4 }} {{- end }} diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index dea5c5298d..d98f444d8c 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -4634,6 +4634,13 @@ opentelemetry-operator: failurePolicy: Fail enabled: true + ## skip admission webhook on our own OpenTelemetryCollector object to avoid having to wait for operator to start + objectSelector: + matchExpressions: + - key: sumologic.com/component + operator: NotIn + values: ["metrics"] + certManager: enabled: false issuerRef: {} diff --git a/tests/helm/testdata/goldenfile/metrics_collector_otc/basic.output.yaml b/tests/helm/testdata/goldenfile/metrics_collector_otc/basic.output.yaml index 760af3d96d..4dbc5e9ea2 100644 --- a/tests/helm/testdata/goldenfile/metrics_collector_otc/basic.output.yaml +++ b/tests/helm/testdata/goldenfile/metrics_collector_otc/basic.output.yaml @@ -5,7 +5,8 @@ kind: OpenTelemetryCollector metadata: name: RELEASE-NAME-sumologic-metrics labels: - app: RELEASE-NAME-sumologic-metrics + sumologic.com/app: otelcol + sumologic.com/component: metrics chart: "sumologic-%CURRENT_CHART_VERSION%" release: "RELEASE-NAME" heritage: "Helm" @@ -90,109 +91,6 @@ spec: requests: storage: 10Gi config: | - receivers: - prometheus: - config: - global: - scrape_interval: 30s - scrape_configs: - ## These scrape configs are for kubelet metrics - ## Prometheus operator does this by manually maintaining a Service with Endpoints for all Nodes - ## We don't have that capability, so we need to use a static configuration - - job_name: kubelet - scheme: https - authorization: - credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecure_skip_verify: true - honor_labels: true - kubernetes_sd_configs: - - role: node - metric_relabel_configs: - - action: keep - regex: (?:kubelet_docker_operations_errors(?:|_total)|kubelet_(?:docker|runtime)_operations_duration_seconds_(?:count|sum)|kubelet_running_(?:container|pod)(?:_count|s)|kubelet_(:?docker|runtime)_operations_latency_microseconds(?:|_count|_sum)) - source_labels: [__name__] - # TODO: The below can't be used due to https://github.com/open-telemetry/opentelemetry-operator/issues/958 - # - action: labeldrop - # regex: id - relabel_configs: &relabel_configs # partially copied from what operator generates - - source_labels: - - __meta_kubernetes_node_name - target_label: node - - source_labels: - - __meta_kubernetes_namespace - target_label: namespace - - source_labels: - - __meta_kubernetes_pod_name - target_label: pod - - source_labels: - - __meta_kubernetes_pod_container_name - target_label: container - - target_label: endpoint - replacement: https-metrics - - source_labels: - - __metrics_path__ - target_label: metrics_path - action: replace - - source_labels: - - __address__ - target_label: instance - action: replace - - job_name: cadvisor - scheme: https - authorization: - credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecure_skip_verify: true - honor_labels: true - metrics_path: /metrics/cadvisor - kubernetes_sd_configs: - - role: node - metric_relabel_configs: - - action: replace - regex: .* - replacement: kubelet - source_labels: [__name__] - target_label: job - - action: keep - regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_fs_usage_bytes|container_fs_limit_bytes|container_cpu_cfs_throttled_seconds_total|container_network_receive_bytes_total|container_network_transmit_bytes_total) - source_labels: [__name__] - ## Drop container metrics with container tag set to an empty string: - ## these are the pod aggregated container metrics which can be aggregated - ## in Sumo anyway. There's also some cgroup-specific time series we also - ## do not need. - - action: drop - source_labels: [__name__, container] - regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_fs_usage_bytes|container_fs_limit_bytes);$ - - action: labelmap - regex: container_name - replacement: container - - action: drop - source_labels: [container] - regex: POD - # TODO: The below can't be used due to https://github.com/open-telemetry/opentelemetry-operator/issues/958 - # - action: labeldrop - # regex: (id|name) - relabel_configs: *relabel_configs # partially copied from what operator generates - target_allocator: - endpoint: http://RELEASE-NAME-sumologic-metrics-targetallocator - interval: 30s - collector_id: ${POD_NAME} - extensions: - health_check: {} - - ## Configuration for File Storage extension - ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/release/v0.37.x/extension/storage/filestorage - file_storage: - directory: /var/lib/storage/otc - timeout: 10s - compaction: - on_rebound: true - directory: /tmp - - pprof: {} exporters: ## ref: https://github.com/SumoLogic/sumologic-otel-collector/tree/main/pkg/exporter/sumologicexporter sumologic/apiserver: @@ -300,10 +198,25 @@ spec: ## set timeout to 30s due to big requests timeout: 30s + + extensions: + health_check: {} + + ## Configuration for File Storage extension + ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/release/v0.37.x/extension/storage/filestorage + file_storage: + directory: /var/lib/storage/otc + timeout: 10s + compaction: + on_rebound: true + directory: /tmp + + pprof: {} + processors: ## Configuration for Batch Processor ## The batch processor accepts spans and places them into batches grouped by node and resource - ## ref: https://github.com/open-telemetry/opentelemetry-collector/tree/v0.73.0/processor/batchprocessor + ## ref: https://github.com/open-telemetry/opentelemetry-collector/tree/v0.76.1/processor/batchprocessor batch: ## Maximum number of spans sent at once send_batch_max_size: 2_048 @@ -344,7 +257,7 @@ spec: ## Configuration for Memory Limiter Processor ## The memory_limiter processor is used to prevent out of memory situations on the collector. - ## ref: https://github.com/open-telemetry/opentelemetry-collector/tree/v0.73.0/processor/memorylimiter + ## ref: https://github.com/open-telemetry/opentelemetry-collector/tree/v0.76.1/processor/memorylimiter memory_limiter: ## check_interval is the time between measurements of memory usage for the ## purposes of avoiding going over the limits. Defaults to zero, so no @@ -357,7 +270,7 @@ spec: spike_limit_percentage: 20 ## Configuration for Metrics Transform Processor - ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/v0.73.0/processor/metricstransformprocessor + ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/v0.76.1/processor/metricstransformprocessor metricstransform: transforms: ## rename all prometheus_remote_write_$name metrics to $name @@ -367,7 +280,7 @@ spec: new_name: $$1 ## Configuration for Resource Processor - ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/v0.73.0/processor/resourceprocessor + ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/v0.76.1/processor/resourceprocessor resource: attributes: - action: upsert @@ -406,7 +319,7 @@ spec: ## are exposed and source processor is configured then send them ## as headers. ## ref: https://github.com/SumoLogic/sumologic-otel-collector/issues/265 - ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/v0.73.0/processor/resourceprocessor + ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/v0.76.1/processor/resourceprocessor resource/delete_source_metadata: attributes: - action: delete @@ -416,7 +329,7 @@ spec: - action: delete key: _sourceName - ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/v0.73.0/processor/resourceprocessor + ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/v0.76.1/processor/resourceprocessor resource/remove_k8s_pod_pod_name: attributes: - action: delete @@ -425,7 +338,7 @@ spec: ## NOTE: below listed rules could be simplified if routingprocessor ## supports regex matching. At this point we could group route entries ## going to the same set of exporters. - ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/v0.73.0/processor/routingprocessor + ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/v0.76.1/processor/routingprocessor routing: attribute_source: resource default_exporters: @@ -481,6 +394,98 @@ spec: sumologic_schema: add_cloud_namespace: false + + receivers: + prometheus: + config: + global: + scrape_interval: 30s + scrape_configs: + ## These scrape configs are for kubelet metrics + ## Prometheus operator does this by manually maintaining a Service with Endpoints for all Nodes + ## We don't have that capability, so we need to use a static configuration + - job_name: kubelet + scheme: https + authorization: + credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + honor_labels: true + kubernetes_sd_configs: + - role: node + metric_relabel_configs: + - action: keep + regex: (?:kubelet_docker_operations_errors(?:|_total)|kubelet_(?:docker|runtime)_operations_duration_seconds_(?:count|sum)|kubelet_running_(?:container|pod)(?:_count|s)|kubelet_(:?docker|runtime)_operations_latency_microseconds(?:|_count|_sum)) + source_labels: [__name__] + # TODO: The below can't be used due to https://github.com/open-telemetry/opentelemetry-operator/issues/958 + # - action: labeldrop + # regex: id + relabel_configs: &relabel_configs # partially copied from what operator generates + - source_labels: + - __meta_kubernetes_node_name + target_label: node + - source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - source_labels: + - __meta_kubernetes_pod_name + target_label: pod + - source_labels: + - __meta_kubernetes_pod_container_name + target_label: container + - target_label: endpoint + replacement: https-metrics + - source_labels: + - __metrics_path__ + target_label: metrics_path + action: replace + - source_labels: + - __address__ + target_label: instance + action: replace + - job_name: cadvisor + scheme: https + authorization: + credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + honor_labels: true + metrics_path: /metrics/cadvisor + kubernetes_sd_configs: + - role: node + metric_relabel_configs: + - action: replace + regex: .* + replacement: kubelet + source_labels: [__name__] + target_label: job + - action: keep + regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_fs_usage_bytes|container_fs_limit_bytes|container_cpu_cfs_throttled_seconds_total|container_network_receive_bytes_total|container_network_transmit_bytes_total) + source_labels: [__name__] + ## Drop container metrics with container tag set to an empty string: + ## these are the pod aggregated container metrics which can be aggregated + ## in Sumo anyway. There's also some cgroup-specific time series we also + ## do not need. + - action: drop + source_labels: [__name__, container] + regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_fs_usage_bytes|container_fs_limit_bytes);$ + - action: labelmap + regex: container_name + replacement: container + - action: drop + source_labels: [container] + regex: POD + # TODO: The below can't be used due to https://github.com/open-telemetry/opentelemetry-operator/issues/958 + # - action: labeldrop + # regex: (id|name) + relabel_configs: *relabel_configs # partially copied from what operator generates + target_allocator: + endpoint: http://RELEASE-NAME-sumologic-metrics-targetallocator + interval: 30s + collector_id: ${POD_NAME} + service: telemetry: logs: diff --git a/tests/integration/features.go b/tests/integration/features.go index 5ab408dc83..d28131cf4b 100644 --- a/tests/integration/features.go +++ b/tests/integration/features.go @@ -537,17 +537,11 @@ func CheckOtelcolMetricsCollectorInstall(builder *features.FeatureBuilder) *feat return builder. Assess("otelcol metrics collector statefulset is ready", stepfuncs.WaitUntilStatefulSetIsReady( - waitDuration, + waitDuration*2, tickDuration, stepfuncs.WithNameF( stepfuncs.ReleaseFormatter("%s-sumologic-metrics-collector"), ), - stepfuncs.WithLabelsF( - stepfuncs.LabelFormatterKV{ - K: "app", - V: stepfuncs.ReleaseFormatter("%s-sumologic-metrics"), - }, - ), ), ). Assess("otelcol metrics collector buffers PVCs are created and bound", diff --git a/tests/integration/values/values_helm_ot_metrics.yaml b/tests/integration/values/values_helm_ot_metrics.yaml index b9d264fed0..8c60bacf76 100644 --- a/tests/integration/values/values_helm_ot_metrics.yaml +++ b/tests/integration/values/values_helm_ot_metrics.yaml @@ -25,8 +25,3 @@ kube-prometheus-stack: opentelemetry-operator: enabled: true - admissionWebhooks: - create: false - manager: - env: - ENABLE_WEBHOOKS: "false"