From 9ef1d384fdca282c8dc5e780d59700c99dc7d938 Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Tue, 25 Jan 2022 15:55:23 +0000 Subject: [PATCH 01/14] add eks/fargate StatefulSet distribution --- .gitignore | 2 + Makefile | 12 ++ README.md | 1 + docs/advanced-configuration.md | 33 ++- .../scripts/eks-fargate-otelcol-with-env.sh | 5 + .../lookup-eks-fargate-receiver-node.sh | 51 +++++ .../templates/_helpers.tpl | 30 +++ .../templates/clusterRole.yaml | 8 + .../templates/config/_common.tpl | 2 +- .../_otel-k8s-cluster-receiver-config.tpl | 87 +++++++- .../templates/configmap-agent.yaml | 5 +- ...uster-receiver-node-discoverer-script.yaml | 16 ++ ...onfigmap-eks-fargate-cluster-receiver.yaml | 16 ++ .../templates/configmap-gateway.yaml | 2 +- .../templates/daemonset.yaml | 5 +- .../deployment-cluster-receiver.yaml | 51 ++++- ...service-cluster-receiver-stateful-set.yaml | 14 ++ .../splunk-otel-collector/values.schema.json | 1 + helm-charts/splunk-otel-collector/values.yaml | 3 +- .../configmap-cluster-receiver.yaml | 9 +- .../deployment-cluster-receiver.yaml | 2 +- .../manifests/eks-fargate/clusterRole.yaml | 89 ++++++++ .../eks-fargate/clusterRoleBinding.yaml | 24 +++ ...uster-receiver-node-discoverer-script.yaml | 69 +++++++ .../configmap-cluster-receiver.yaml | 140 +++++++++++++ ...onfigmap-eks-fargate-cluster-receiver.yaml | 23 +++ .../eks-fargate/configmap-gateway.yaml | 192 ++++++++++++++++++ .../deployment-cluster-receiver.yaml | 143 +++++++++++++ .../eks-fargate/deployment-gateway.yaml | 121 +++++++++++ rendered/manifests/eks-fargate/secret.yaml | 19 ++ ...service-cluster-receiver-stateful-set.yaml | 13 ++ rendered/manifests/eks-fargate/service.yaml | 57 ++++++ .../manifests/eks-fargate/serviceAccount.yaml | 16 ++ .../configmap-cluster-receiver.yaml | 9 +- .../deployment-cluster-receiver.yaml | 2 +- .../otel-logs/configmap-cluster-receiver.yaml | 9 +- .../deployment-cluster-receiver.yaml | 2 +- 37 files changed, 1257 insertions(+), 26 deletions(-) create mode 100644 .gitignore create mode 100644 helm-charts/splunk-otel-collector/scripts/eks-fargate-otelcol-with-env.sh create mode 100644 helm-charts/splunk-otel-collector/scripts/lookup-eks-fargate-receiver-node.sh create mode 100644 helm-charts/splunk-otel-collector/templates/configmap-cluster-receiver-node-discoverer-script.yaml create mode 100644 helm-charts/splunk-otel-collector/templates/configmap-eks-fargate-cluster-receiver.yaml create mode 100644 helm-charts/splunk-otel-collector/templates/service-cluster-receiver-stateful-set.yaml create mode 100644 rendered/manifests/eks-fargate/clusterRole.yaml create mode 100644 rendered/manifests/eks-fargate/clusterRoleBinding.yaml create mode 100644 rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml create mode 100644 rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml create mode 100644 rendered/manifests/eks-fargate/configmap-eks-fargate-cluster-receiver.yaml create mode 100644 rendered/manifests/eks-fargate/configmap-gateway.yaml create mode 100644 rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml create mode 100644 rendered/manifests/eks-fargate/deployment-gateway.yaml create mode 100644 rendered/manifests/eks-fargate/secret.yaml create mode 100644 rendered/manifests/eks-fargate/service-cluster-receiver-stateful-set.yaml create mode 100644 rendered/manifests/eks-fargate/service.yaml create mode 100644 rendered/manifests/eks-fargate/serviceAccount.yaml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..c38fa4e005 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea +*.iml diff --git a/Makefile b/Makefile index 7444bf73a7..78e2d9a55f 100644 --- a/Makefile +++ b/Makefile @@ -49,3 +49,15 @@ render: default helm-charts/splunk-otel-collector; \ mv "$$dir"/splunk-otel-collector/templates/* "$$dir"; \ rm -rf "$$dir"/splunk-otel-collector + + # eks/fargate deployment (with recommended gateway) + dir=rendered/manifests/eks-fargate; \ + mkdir -p "$$dir"; \ + helm template \ + --namespace default \ + --values rendered/values.yaml \ + --output-dir "$$dir" \ + --set distribution=eks/fargate,gateway.enabled=true,cloudProvider=aws \ + default helm-charts/splunk-otel-collector; \ + mv "$$dir"/splunk-otel-collector/templates/* "$$dir"; \ + rm -rf "$$dir"/splunk-otel-collector diff --git a/README.md b/README.md index de3b52460f..e36dbf61b3 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,7 @@ Kubernetes distributions: - [Vanilla (unmodified version) Kubernetes](https://kubernetes.io) - [Amazon Elastic Kubernetes Service](https://aws.amazon.com/eks) + including [with Fargate profiles](docs/advanced-configuration.md#eks-fargate-support) - [Azure Kubernetes Service](https://docs.microsoft.com/en-us/azure/aks) - [Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine) including [GKE Autopilot](docs/advanced-configuration.md#gke-autopilot-support) diff --git a/docs/advanced-configuration.md b/docs/advanced-configuration.md index ea8d1b60c8..091121e905 100644 --- a/docs/advanced-configuration.md +++ b/docs/advanced-configuration.md @@ -43,10 +43,11 @@ Use the `distribution` parameter to provide information about underlying Kubernetes deployment. This parameter allows the connector to automatically scrape additional metadata. The supported options are: +- `aks` - Azure AKS - `eks` - Amazon EKS +- `eks/fargate` - Amazon EKS with Fargate profiles - `gke` - Google GKE / Standard mode - `gke/autopilot` - Google GKE / Autopilot mode -- `aks` - Azure AKS - `openshift` - Red Hat OpenShift This value can be omitted if none of the values apply. @@ -157,6 +158,36 @@ the following line to your custom values.yaml: priorityClassName: splunk-otel-agent-priority ``` +## EKS Fargate support + +If you want to run the Splunk OpenTelemetry Collector in [Amazon Elastic Kubernetes Service +with Fargate profiles](https://docs.aws.amazon.com/eks/latest/userguide/fargate.html), +make sure to set the required `distribution` value to `eks/fargate`: + +```yaml +distribution: eks/fargate +``` + +**NOTE:** Fluentd and Native OTel logs collection are not yet automatically configured in EKS with Fargate profiles + +This distribution will operate similarly to the `eks` distribution but with the following distinctions: + +1. The Collector agent daemonset is not applied since Fargate doesn't support daemonsets. Any desired Collector instances +running as agents must be configured manually as sidecar containers in your custom deployments. This includes any application +logging services like Fluentd. We recommend setting the `gateway.enabled` to `true` and configuring your instrumented +applications to report metrics, traces, and logs to the gateway's `-splunk-otel-collector` service address if no +agent instances are used in your cluster. Any desired agent instances that would run as a daemonset should be run as a sidecar container in your pod. +2. The Collector's ClusterRole for `eks/fargate` will allow the `patch` verb on `nodes` resources for the default API groups. This is to allow +the Cluster Receiver's init container to add a `splunk-otel-is-eks-fargate-cluster-receiver-node` node label for self monitoring. This label is currently +required for reporting kubelet and pod metrics for the cluster receiver StatefulSet described below. +3. The configured Cluster Receiver is deployed as a 2-replica StatefulSet and uses a +[Kubernetes Observer extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/extension/observer/k8sobserver/README.md) +that discovers the cluster's nodes and pods. It uses this to dynamically create +[Kubelet Stats receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/kubeletstatsreceiver/README.md) +instances that will report kubelet metrics for all observed Fargate nodes, distributed across replicas. The first replica will monitor all kubelets +except its own (due to an EKS/Fargate networking restriction) and the second will monitor the first replica's. This is made possible by the Fargate-specific +deployment label mentioned above. The second replica will also have a k8s_cluster receiver instance. + ## Logs collection The helm chart currently utilizes [fluentd](https://docs.fluentd.org/) for Kubernetes logs diff --git a/helm-charts/splunk-otel-collector/scripts/eks-fargate-otelcol-with-env.sh b/helm-charts/splunk-otel-collector/scripts/eks-fargate-otelcol-with-env.sh new file mode 100644 index 0000000000..2b5d01afb2 --- /dev/null +++ b/helm-charts/splunk-otel-collector/scripts/eks-fargate-otelcol-with-env.sh @@ -0,0 +1,5 @@ +set -ex +if [ -f /splunk-messages/environ ]; then + . /splunk-messages/environ +fi +/otelcol $@ diff --git a/helm-charts/splunk-otel-collector/scripts/lookup-eks-fargate-receiver-node.sh b/helm-charts/splunk-otel-collector/scripts/lookup-eks-fargate-receiver-node.sh new file mode 100644 index 0000000000..38c79c5de1 --- /dev/null +++ b/helm-charts/splunk-otel-collector/scripts/lookup-eks-fargate-receiver-node.sh @@ -0,0 +1,51 @@ +#! /usr/bin/bash +set -ex + +# If we are the first pod (cluster receiver), set the kubelet stats node filter to only follow labelled nodes. +# This node label will be set by the second pod. +if [[ "${K8S_POD_NAME}" == *-0 ]]; then + echo "will configure kubelet stats receiver to follow node ${FIRST_CR_REPLICA_NODE_NAME}, as well as use cluster receiver." + echo "export CR_KUBELET_STATS_NODE_FILTER='&& labels[\"splunk-otel-is-eks-fargate-cluster-receiver-node\"] == \"true\"'" >/splunk-messages/environ + echo "export CR_K8S_OBSERVER_OBSERVE_PODS='false'" >>/splunk-messages/environ + + cat /splunk-messages/environ + + # copy config to meet container command args + cp /conf/relay.yaml /splunk-messages/config.yaml + exit 0 +fi + +# Else we are the second pod (wide kubelet stats) label our node to be monitored by the first pod and disable the k8s_cluster receiver. +# Update our config to not monitor ourselves +echo "Labelling our fargate node to denote it hosts the cluster receiver" + +# download kubectl (verifying checksum) +curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.15/2020-11-02/bin/linux/amd64/kubectl +curl -o kubectl.sha256 https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.15/2020-11-02/bin/linux/amd64/kubectl.sha256 +ACTUAL=$(sha256sum kubectl | awk '{print $1}') +EXPECTED=$(cat kubectl.sha256 | awk '{print $1}') +if [ "${ACTUAL}" != "${EXPECTED}" ]; then + echo "will not attempt to use kubectl with unexpected sha256 (${ACTUAL} != ${EXPECTED})" + exit 1 +fi +chmod a+x kubectl +# label node +./kubectl label nodes $K8S_NODE_NAME splunk-otel-is-eks-fargate-cluster-receiver-node=true + +echo "Disabling k8s_cluster receiver for this instance" +# download yq to strip k8s_cluster receiver +curl -L -o yq https://github.com/mikefarah/yq/releases/download/v4.16.2/yq_linux_amd64 +ACTUAL=$(sha256sum yq | awk '{print $1}') +if [ "${ACTUAL}" != "5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d" ]; then + echo "will not attempt to use yq with unexpected sha256 (${ACTUAL} != 5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d)" + exit 1 +fi +chmod a+x yq +# strip k8s_cluster and its pipeline +./yq e 'del(.service.pipelines.metrics)' /conf/relay.yaml >/splunk-messages/config.yaml +./yq e -i 'del(.receivers.k8s_cluster)' /splunk-messages/config.yaml + +# set kubelet stats to not monitor ourselves (all other kubelets) +echo "EKS kubelet stats receiver node lookup not applicable for $K8S_POD_NAME. Ensuring it won't monitor itself to avoid Fargate network limitation." +echo "export CR_KUBELET_STATS_NODE_FILTER='&& not ( name contains \"${K8S_NODE_NAME}\" )'" >/splunk-messages/environ +echo "export CR_K8S_OBSERVER_OBSERVE_PODS='true'" >>/splunk-messages/environ diff --git a/helm-charts/splunk-otel-collector/templates/_helpers.tpl b/helm-charts/splunk-otel-collector/templates/_helpers.tpl index d1225374da..a3e6c80a8f 100644 --- a/helm-charts/splunk-otel-collector/templates/_helpers.tpl +++ b/helm-charts/splunk-otel-collector/templates/_helpers.tpl @@ -308,3 +308,33 @@ compatibility with the old config group name: "otelK8sClusterReceiver". {{- deepCopy .Values.otelK8sClusterReceiver | mustMergeOverwrite (deepCopy .Values.clusterReceiver) | toYaml }} {{- end }} {{- end -}} + +{{/* +"clusterReceiverServiceName" for the eks/fargate cluster receiver statefulSet +*/}} +{{- define "splunk-otel-collector.clusterReceiverServiceName" -}} +{{ printf "%s-k8s-cluster-receiver" ( include "splunk-otel-collector.fullname" . ) | trunc 63 | trimSuffix "-" }} +{{- end -}} + +{{/* +"clusterReceiverNodeDiscovererScript" for the eks/fargate cluster receiver statefulSet initContainer +*/}} +{{- define "splunk-otel-collector.clusterReceiverNodeDiscovererScript" -}} +{{ printf "%s-cr-node-discoverer-script" ( include "splunk-otel-collector.fullname" . ) | trunc 63 | trimSuffix "-" }} +{{- end -}} + +{{/* +"eksFargateClusterReceiverScript" for the eks/fargate cluster receiver statefulSet run command +*/}} +{{- define "splunk-otel-collector.eksFargateClusterReceiverScript" -}} +{{ printf "%s-fargate-cr-script" ( include "splunk-otel-collector.fullname" . ) | trunc 63 | trimSuffix "-" }} +{{- end -}} + +{{/* +"clusterReceiverNodeDiscovererInitContainerEnabled" that's based on clusterReceiver.enabled, o11yMetricsEnabled, and eks/fargate distribution +*/}} +{{- define "splunk-otel-collector.clusterReceiverNodeDiscovererInitContainerEnabled" -}} +{{- $clusterReceiver := fromYaml (include "splunk-otel-collector.clusterReceiver" .) }} +{{- $o11yMetricsEnabled := (include "splunk-otel-collector.o11yMetricsEnabled" .) }} +{{- and (eq (toString $clusterReceiver.enabled) "true") (eq (toString $o11yMetricsEnabled) "true") (eq (include "splunk-otel-collector.distribution" .) "eks/fargate") -}} +{{- end -}} diff --git a/helm-charts/splunk-otel-collector/templates/clusterRole.yaml b/helm-charts/splunk-otel-collector/templates/clusterRole.yaml index d22d68aa46..ebb9d8ba2e 100644 --- a/helm-charts/splunk-otel-collector/templates/clusterRole.yaml +++ b/helm-charts/splunk-otel-collector/templates/clusterRole.yaml @@ -86,6 +86,14 @@ rules: - get - list - watch +{{- if eq (include "splunk-otel-collector.clusterReceiverNodeDiscovererInitContainerEnabled" .) "true" }} +- apiGroups: + - "" + resources: + - nodes + verbs: + - patch +{{- end }} {{- with .Values.rbac.customRules }} {{ toYaml . }} {{- end }} diff --git a/helm-charts/splunk-otel-collector/templates/config/_common.tpl b/helm-charts/splunk-otel-collector/templates/config/_common.tpl index 79557d5d1c..daa1207df3 100644 --- a/helm-charts/splunk-otel-collector/templates/config/_common.tpl +++ b/helm-charts/splunk-otel-collector/templates/config/_common.tpl @@ -69,7 +69,7 @@ resourcedetection: - env {{- if hasPrefix "gke" (include "splunk-otel-collector.distribution" .) }} - gke - {{- else if eq (include "splunk-otel-collector.distribution" .) "eks" }} + {{- else if hasPrefix "eks" (include "splunk-otel-collector.distribution" .) }} - eks {{- else if eq (include "splunk-otel-collector.distribution" .) "aks" }} - aks diff --git a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl index dc34a23ccd..21325d76b9 100644 --- a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl +++ b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl @@ -11,6 +11,14 @@ extensions: memory_ballast: size_mib: ${SPLUNK_BALLAST_SIZE_MIB} + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + # k8s_observer w/ pod and node detection for eks/fargate deployment + k8s_observer: + auth_type: serviceAccount + observe_pods: ${CR_K8S_OBSERVER_OBSERVE_PODS} + observe_nodes: true + {{- end }} + receivers: # Prometheus receiver scraping metrics from the pod itself, both otel and fluentd prometheus/k8s_cluster_receiver: @@ -42,6 +50,26 @@ receivers: - reason: FailedCreate involvedObjectKind: Job {{- end }} + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + # dynamically created kubeletstats receiver to report all Fargate "node" kubelet stats + # with exception of collector "node's" own since Fargate forbids connection. + receiver_creator: + receivers: + kubeletstats: + rule: type == "k8s.node" && name contains "fargate" ${CR_KUBELET_STATS_NODE_FILTER} + config: + auth_type: serviceAccount + collection_interval: 10s + endpoint: "`endpoint`:`kubelet_endpoint_port`" + extra_metadata_labels: + - container.id + metric_groups: + - container + - pod + - node + watch_observers: + - k8s_observer + {{- end }} processors: {{- include "splunk-otel-collector.otelMemoryLimiterConfig" . | nindent 2 }} @@ -80,12 +108,6 @@ processors: - action: insert key: metric_source value: kubernetes - # XXX: Added so that Smart Agent metrics and OTel metrics don't map to the same MTS identity - # (same metric and dimension names and values) after mappings are applied. This would be - # the case if somebody uses the same cluster name from Smart Agent and OTel in the same org. - - action: insert - key: receiver - value: k8scluster - action: upsert key: k8s.cluster.name value: {{ .Values.clusterName }} @@ -95,6 +117,15 @@ processors: value: {{ .value }} {{- end }} + resource/k8s_cluster: + attributes: + # XXX: Added so that Smart Agent metrics and OTel metrics don't map to the same MTS identity + # (same metric and dimension names and values) after mappings are applied. This would be + # the case if somebody uses the same cluster name from Smart Agent and OTel in the same org. + - action: insert + key: receiver + value: k8scluster + exporters: {{- if eq (include "splunk-otel-collector.o11yMetricsEnabled" $) "true" }} signalfx: @@ -125,11 +156,27 @@ service: telemetry: metrics: address: 0.0.0.0:8889 + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + extensions: [health_check, memory_ballast, k8s_observer] + {{- else }} extensions: [health_check, memory_ballast] + {{- end }} pipelines: # k8s metrics pipeline metrics: receivers: [k8s_cluster] + processors: [memory_limiter, batch, resource, resource/k8s_cluster] + exporters: + {{- if (eq (include "splunk-otel-collector.o11yMetricsEnabled" .) "true") }} + - signalfx + {{- end }} + {{- if (eq (include "splunk-otel-collector.platformMetricsEnabled" $) "true") }} + - splunk_hec/platform_metrics + {{- end }} + + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + metrics/eks: + receivers: [receiver_creator] processors: [memory_limiter, batch, resource] exporters: {{- if (eq (include "splunk-otel-collector.o11yMetricsEnabled" .) "true") }} @@ -138,6 +185,7 @@ service: {{- if (eq (include "splunk-otel-collector.platformMetricsEnabled" $) "true") }} - splunk_hec/platform_metrics {{- end }} + {{- end }} {{- if or (eq (include "splunk-otel-collector.splunkO11yEnabled" $) "true") (eq (include "splunk-otel-collector.platformMetricsEnabled" $) "true") }} # Pipeline for metrics collected about the collector pod itself. @@ -174,3 +222,30 @@ service: {{- end }} {{- end }} {{- end }} + +{{- define "splunk-otel-collector.clusterReceiverInitContainers" -}} +{{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} +- name: cluster-receiver-node-discoverer + image: public.ecr.aws/amazonlinux/amazonlinux:latest + imagePullPolicy: IfNotPresent + command: [ "bash", "-c", "/splunk-scripts/lookup-eks-fargate-receiver-node.sh"] + securityContext: + runAsUser: 0 + env: + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: {{ template "splunk-otel-collector.clusterReceiverNodeDiscovererScript" . }} + mountPath: /splunk-scripts + - name: messages + mountPath: /splunk-messages + - mountPath: /conf + name: collector-configmap +{{- end -}} +{{- end -}} diff --git a/helm-charts/splunk-otel-collector/templates/configmap-agent.yaml b/helm-charts/splunk-otel-collector/templates/configmap-agent.yaml index 306c2b557a..56de122fd3 100644 --- a/helm-charts/splunk-otel-collector/templates/configmap-agent.yaml +++ b/helm-charts/splunk-otel-collector/templates/configmap-agent.yaml @@ -1,5 +1,8 @@ {{ $agent := fromYaml (include "splunk-otel-collector.agent" .) }} -{{ if $agent.enabled }} +{{/* +Fargate doesn't support daemonsets so never use for that platform +*/}} +{{- if and $agent.enabled (ne (include "splunk-otel-collector.distribution" .) "eks/fargate") }} apiVersion: v1 kind: ConfigMap metadata: diff --git a/helm-charts/splunk-otel-collector/templates/configmap-cluster-receiver-node-discoverer-script.yaml b/helm-charts/splunk-otel-collector/templates/configmap-cluster-receiver-node-discoverer-script.yaml new file mode 100644 index 0000000000..74456d931e --- /dev/null +++ b/helm-charts/splunk-otel-collector/templates/configmap-cluster-receiver-node-discoverer-script.yaml @@ -0,0 +1,16 @@ +{{ $clusterReceiver := fromYaml (include "splunk-otel-collector.clusterReceiver" .) }} +{{ if and $clusterReceiver.enabled (eq (include "splunk-otel-collector.metricsEnabled" .) "true") (eq (include "splunk-otel-collector.distribution" .) "eks/fargate") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "splunk-otel-collector.clusterReceiverNodeDiscovererScript" . }} + labels: + {{- include "splunk-otel-collector.commonLabels" . | nindent 4 }} + app: {{ template "splunk-otel-collector.name" . }} + chart: {{ template "splunk-otel-collector.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + script: | + {{- (.Files.Get "scripts/lookup-eks-fargate-receiver-node.sh") | nindent 4 }} +{{- end }} diff --git a/helm-charts/splunk-otel-collector/templates/configmap-eks-fargate-cluster-receiver.yaml b/helm-charts/splunk-otel-collector/templates/configmap-eks-fargate-cluster-receiver.yaml new file mode 100644 index 0000000000..24695487d3 --- /dev/null +++ b/helm-charts/splunk-otel-collector/templates/configmap-eks-fargate-cluster-receiver.yaml @@ -0,0 +1,16 @@ +{{ $clusterReceiver := fromYaml (include "splunk-otel-collector.clusterReceiver" .) }} +{{ if and $clusterReceiver.enabled (eq (include "splunk-otel-collector.metricsEnabled" .) "true") (eq (include "splunk-otel-collector.distribution" .) "eks/fargate") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "splunk-otel-collector.eksFargateClusterReceiverScript" . }} + labels: + {{- include "splunk-otel-collector.commonLabels" . | nindent 4 }} + app: {{ template "splunk-otel-collector.name" . }} + chart: {{ template "splunk-otel-collector.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + script: | + {{- (.Files.Get "scripts/eks-fargate-otelcol-with-env.sh") | nindent 4 }} +{{- end }} diff --git a/helm-charts/splunk-otel-collector/templates/configmap-gateway.yaml b/helm-charts/splunk-otel-collector/templates/configmap-gateway.yaml index b97fcc7fcc..a38dbb4345 100644 --- a/helm-charts/splunk-otel-collector/templates/configmap-gateway.yaml +++ b/helm-charts/splunk-otel-collector/templates/configmap-gateway.yaml @@ -1,5 +1,5 @@ {{ $gateway := fromYaml (include "splunk-otel-collector.gateway" .) }} -{{ if $gateway.enabled }} +{{ if or $gateway.enabled (eq (include "splunk-otel-collector.distribution" .) "eks/fargate") }} apiVersion: v1 kind: ConfigMap metadata: diff --git a/helm-charts/splunk-otel-collector/templates/daemonset.yaml b/helm-charts/splunk-otel-collector/templates/daemonset.yaml index d6279ae3dd..742a195cb8 100644 --- a/helm-charts/splunk-otel-collector/templates/daemonset.yaml +++ b/helm-charts/splunk-otel-collector/templates/daemonset.yaml @@ -1,5 +1,8 @@ {{ $agent := fromYaml (include "splunk-otel-collector.agent" .) }} -{{- if $agent.enabled }} +{{/* +Fargate doesn't support daemonsets so never use for that platform +*/}} +{{- if and $agent.enabled (ne (include "splunk-otel-collector.distribution" .) "eks/fargate") }} apiVersion: apps/v1 kind: DaemonSet metadata: diff --git a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml index 1004068a24..c88a9e5e00 100644 --- a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml +++ b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml @@ -1,7 +1,12 @@ {{ $clusterReceiver := fromYaml (include "splunk-otel-collector.clusterReceiver" .) }} {{- if and $clusterReceiver.enabled (eq (include "splunk-otel-collector.metricsEnabled" .) "true") }} apiVersion: apps/v1 -kind: Deployment +{{- /* +eks/fargate distributions use a two-replica StatefulSet instead of a single node deployment. +The first replica monitors all fargate node kubelets (except its own) via k8s_observer and kubeletstats receiver. +The second replica monitors the first replica's kubelet and the cluster. +*/}} +kind: {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} StatefulSet {{- else }} Deployment {{- end }} metadata: name: {{ template "splunk-otel-collector.fullname" . }}-k8s-cluster-receiver labels: @@ -17,7 +22,11 @@ metadata: {{- toYaml $clusterReceiver.annotations | nindent 4 }} {{- end }} spec: - replicas: 1 + replicas: {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} 2 {{- else }} 1 {{- end }} + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + serviceName: {{ template "splunk-otel-collector.clusterReceiverServiceName" . }} + podManagementPolicy: Parallel + {{- end }} selector: matchLabels: app: {{ template "splunk-otel-collector.name" . }} @@ -60,9 +69,18 @@ spec: securityContext: {{ toYaml $clusterReceiver.securityContext | nindent 8 }} {{- end }} + {{- if (include "splunk-otel-collector.clusterReceiverInitContainers" .) }} + initContainers: + {{ include "splunk-otel-collector.clusterReceiverInitContainers" . | indent 8 }} + {{- end }} containers: - name: otel-collector command: + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + - bash + - -c + - "/splunk-scripts/eks-fargate-otelcol-with-env.sh --config=/splunk-messages/config.yaml --metrics-addr=0.0.0.0:8889" + {{- else }} {{- if .Values.isWindows }} - powershell.exe - -command @@ -72,6 +90,7 @@ spec: - /otelcol - --config=/conf/relay.yaml {{- end }} + {{- end }} image: {{ template "splunk-otel-collector.image.otelcol" . }} imagePullPolicy: {{ .Values.image.otelcol.pullPolicy }} env: @@ -134,6 +153,14 @@ spec: volumeMounts: - mountPath: {{ .Values.isWindows | ternary "C:\\conf" "/conf" }} name: collector-configmap + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + - mountPath: /splunk-scripts + name: {{ template "splunk-otel-collector.eksFargateClusterReceiverScript" . }} + {{- end }} + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + - mountPath: /splunk-messages + name: messages + {{- end }} {{- if $clusterReceiver.extraVolumeMounts }} {{- toYaml $clusterReceiver.extraVolumeMounts | nindent 8 }} {{- end }} @@ -145,6 +172,26 @@ spec: items: - key: relay path: relay.yaml + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + - name: {{ template "splunk-otel-collector.eksFargateClusterReceiverScript" . }} + configMap: + name: {{ template "splunk-otel-collector.eksFargateClusterReceiverScript" . }} + items: + - key: script + path: eks-fargate-otelcol-with-env.sh + mode: 0777 + {{- end }} + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + - name: {{ template "splunk-otel-collector.clusterReceiverNodeDiscovererScript" . }} + configMap: + name: {{ template "splunk-otel-collector.clusterReceiverNodeDiscovererScript" . }} + items: + - key: script + path: lookup-eks-fargate-receiver-node.sh + mode: 0777 + - name: messages + emptyDir: {} + {{- end }} {{- if $clusterReceiver.extraVolumes }} {{- toYaml $clusterReceiver.extraVolumes | nindent 6 }} {{- end }} diff --git a/helm-charts/splunk-otel-collector/templates/service-cluster-receiver-stateful-set.yaml b/helm-charts/splunk-otel-collector/templates/service-cluster-receiver-stateful-set.yaml new file mode 100644 index 0000000000..e561f750f5 --- /dev/null +++ b/helm-charts/splunk-otel-collector/templates/service-cluster-receiver-stateful-set.yaml @@ -0,0 +1,14 @@ +{{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "splunk-otel-collector.clusterReceiverServiceName" . }} + labels: + app: {{ template "splunk-otel-collector.name" . }} +spec: + clusterIP: None + selector: + app: {{ template "splunk-otel-collector.name" . }} + component: otel-k8s-cluster-receiver +{{- end }} diff --git a/helm-charts/splunk-otel-collector/values.schema.json b/helm-charts/splunk-otel-collector/values.schema.json index 0639373dd9..9eca5291ad 100644 --- a/helm-charts/splunk-otel-collector/values.schema.json +++ b/helm-charts/splunk-otel-collector/values.schema.json @@ -197,6 +197,7 @@ "type": "string", "enum": [ "eks", + "eks/fargate", "gke", "gke/autopilot", "aks", diff --git a/helm-charts/splunk-otel-collector/values.yaml b/helm-charts/splunk-otel-collector/values.yaml index d8e77875e4..6870b9dc51 100644 --- a/helm-charts/splunk-otel-collector/values.yaml +++ b/helm-charts/splunk-otel-collector/values.yaml @@ -125,10 +125,11 @@ cloudProvider: "" ################################################################################ # Kubernetes distribution being run. Leave empty for other. +# - "aks" (Azure Kubernetes Service) # - "eks" (Amazon Elastic Kubernetes Service) +# - "eks/fargate" (Amazon Elastic Kubernetes Service with Fargate profiles ) # - "gke" (Google Kubernetes Engine / Standard mode) # - "gke/autopilot" (Google Kubernetes Engine / Autopilot mode) -# - "aks" (Azure Kubernetes Service) # - "openshift" (RedHat OpenShift) ################################################################################ diff --git a/rendered/manifests/agent-only/configmap-cluster-receiver.yaml b/rendered/manifests/agent-only/configmap-cluster-receiver.yaml index 0050ec98de..7bb4d8dcef 100644 --- a/rendered/manifests/agent-only/configmap-cluster-receiver.yaml +++ b/rendered/manifests/agent-only/configmap-cluster-receiver.yaml @@ -36,9 +36,6 @@ data: - action: insert key: metric_source value: kubernetes - - action: insert - key: receiver - value: k8scluster - action: upsert key: k8s.cluster.name value: CHANGEME @@ -56,6 +53,11 @@ data: - action: insert key: k8s.namespace.name value: ${K8S_NAMESPACE} + resource/k8s_cluster: + attributes: + - action: insert + key: receiver + value: k8scluster resourcedetection: detectors: - env @@ -87,6 +89,7 @@ data: - memory_limiter - batch - resource + - resource/k8s_cluster receivers: - k8s_cluster metrics/collector: diff --git a/rendered/manifests/agent-only/deployment-cluster-receiver.yaml b/rendered/manifests/agent-only/deployment-cluster-receiver.yaml index 346b9b288e..e89a8abb2e 100644 --- a/rendered/manifests/agent-only/deployment-cluster-receiver.yaml +++ b/rendered/manifests/agent-only/deployment-cluster-receiver.yaml @@ -30,7 +30,7 @@ spec: component: otel-k8s-cluster-receiver release: default annotations: - checksum/config: 95fad8a7a34350631ceacc2fc8498d3ae05d1be9df4fd9e71dd6036be4fa1919 + checksum/config: fe6a43ddd66a904c3e515232f6581738fba02d015ac9e5a55b98122b120e4bf2 spec: serviceAccountName: default-splunk-otel-collector nodeSelector: diff --git a/rendered/manifests/eks-fargate/clusterRole.yaml b/rendered/manifests/eks-fargate/clusterRole.yaml new file mode 100644 index 0000000000..d993a88e65 --- /dev/null +++ b/rendered/manifests/eks-fargate/clusterRole.yaml @@ -0,0 +1,89 @@ +--- +# Source: splunk-otel-collector/templates/clusterRole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: default-splunk-otel-collector + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.43.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.43.0" + app: splunk-otel-collector + chart: splunk-otel-collector-0.43.0 + release: default + heritage: Helm +rules: +- apiGroups: + - "" + resources: + - events + - namespaces + - namespaces/status + - nodes + - nodes/spec + - nodes/stats + - nodes/proxy + - pods + - pods/status + - persistentvolumeclaims + - persistentvolumes + - replicationcontrollers + - replicationcontrollers/status + - resourcequotas + - services + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - nodes + verbs: + - patch diff --git a/rendered/manifests/eks-fargate/clusterRoleBinding.yaml b/rendered/manifests/eks-fargate/clusterRoleBinding.yaml new file mode 100644 index 0000000000..bb0b10518f --- /dev/null +++ b/rendered/manifests/eks-fargate/clusterRoleBinding.yaml @@ -0,0 +1,24 @@ +--- +# Source: splunk-otel-collector/templates/clusterRoleBinding.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: default-splunk-otel-collector + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.43.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.43.0" + app: splunk-otel-collector + chart: splunk-otel-collector-0.43.0 + release: default + heritage: Helm +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: default-splunk-otel-collector +subjects: +- kind: ServiceAccount + name: default-splunk-otel-collector + namespace: default diff --git a/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml b/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml new file mode 100644 index 0000000000..0dfda5a4d2 --- /dev/null +++ b/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml @@ -0,0 +1,69 @@ +--- +# Source: splunk-otel-collector/templates/configmap-cluster-receiver-node-discoverer-script.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: default-splunk-otel-collector-cr-node-discoverer-script + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.43.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.43.0" + app: splunk-otel-collector + chart: splunk-otel-collector-0.43.0 + release: default + heritage: Helm +data: + script: | + #! /usr/bin/bash + set -ex + + # If we are the first pod (cluster receiver), set the kubelet stats node filter to only follow labelled nodes. + # This node label will be set by the second pod. + if [[ "${K8S_POD_NAME}" == *-0 ]]; then + echo "will configure kubelet stats receiver to follow node ${FIRST_CR_REPLICA_NODE_NAME}, as well as use cluster receiver." + echo "export CR_KUBELET_STATS_NODE_FILTER='&& labels[\"splunk-otel-is-eks-fargate-cluster-receiver-node\"] == \"true\"'" >/splunk-messages/environ + echo "export CR_K8S_OBSERVER_OBSERVE_PODS='false'" >>/splunk-messages/environ + + cat /splunk-messages/environ + + # copy config to meet container command args + cp /conf/relay.yaml /splunk-messages/config.yaml + exit 0 + fi + + # Else we are the second pod (wide kubelet stats) label our node to be monitored by the first pod and disable the k8s_cluster receiver. + # Update our config to not monitor ourselves + echo "Labelling our fargate node to denote it hosts the cluster receiver" + + # download kubectl (verifying checksum) + curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.15/2020-11-02/bin/linux/amd64/kubectl + curl -o kubectl.sha256 https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.15/2020-11-02/bin/linux/amd64/kubectl.sha256 + ACTUAL=$(sha256sum kubectl | awk '{print $1}') + EXPECTED=$(cat kubectl.sha256 | awk '{print $1}') + if [ "${ACTUAL}" != "${EXPECTED}" ]; then + echo "will not attempt to use kubectl with unexpected sha256 (${ACTUAL} != ${EXPECTED})" + exit 1 + fi + chmod a+x kubectl + # label node + ./kubectl label nodes $K8S_NODE_NAME splunk-otel-is-eks-fargate-cluster-receiver-node=true + + echo "Disabling k8s_cluster receiver for this instance" + # download yq to strip k8s_cluster receiver + curl -L -o yq https://github.com/mikefarah/yq/releases/download/v4.16.2/yq_linux_amd64 + ACTUAL=$(sha256sum yq | awk '{print $1}') + if [ "${ACTUAL}" != "5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d" ]; then + echo "will not attempt to use yq with unexpected sha256 (${ACTUAL} != 5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d)" + exit 1 + fi + chmod a+x yq + # strip k8s_cluster and its pipeline + ./yq e 'del(.service.pipelines.metrics)' /conf/relay.yaml >/splunk-messages/config.yaml + ./yq e -i 'del(.receivers.k8s_cluster)' /splunk-messages/config.yaml + + # set kubelet stats to not monitor ourselves (all other kubelets) + echo "EKS kubelet stats receiver node lookup not applicable for $K8S_POD_NAME. Ensuring it won't monitor itself to avoid Fargate network limitation." + echo "export CR_KUBELET_STATS_NODE_FILTER='&& not ( name contains \"${K8S_NODE_NAME}\" )'" >/splunk-messages/environ + echo "export CR_K8S_OBSERVER_OBSERVE_PODS='true'" >>/splunk-messages/environ diff --git a/rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml b/rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml new file mode 100644 index 0000000000..a468742d28 --- /dev/null +++ b/rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml @@ -0,0 +1,140 @@ +--- +# Source: splunk-otel-collector/templates/configmap-cluster-receiver.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: default-splunk-otel-collector-otel-k8s-cluster-receiver + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.43.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.43.0" + app: splunk-otel-collector + chart: splunk-otel-collector-0.43.0 + release: default + heritage: Helm +data: + relay: | + exporters: + signalfx: + access_token: ${SPLUNK_OBSERVABILITY_ACCESS_TOKEN} + api_url: http://default-splunk-otel-collector:6060 + ingest_url: http://default-splunk-otel-collector:9943 + timeout: 10s + extensions: + health_check: null + k8s_observer: + auth_type: serviceAccount + observe_nodes: true + observe_pods: ${CR_K8S_OBSERVER_OBSERVE_PODS} + memory_ballast: + size_mib: ${SPLUNK_BALLAST_SIZE_MIB} + processors: + batch: null + memory_limiter: + check_interval: 2s + limit_mib: ${SPLUNK_MEMORY_LIMIT_MIB} + resource: + attributes: + - action: insert + key: metric_source + value: kubernetes + - action: upsert + key: k8s.cluster.name + value: CHANGEME + resource/add_collector_k8s: + attributes: + - action: insert + key: k8s.node.name + value: ${K8S_NODE_NAME} + - action: insert + key: k8s.pod.name + value: ${K8S_POD_NAME} + - action: insert + key: k8s.pod.uid + value: ${K8S_POD_UID} + - action: insert + key: k8s.namespace.name + value: ${K8S_NAMESPACE} + resource/k8s_cluster: + attributes: + - action: insert + key: receiver + value: k8scluster + resourcedetection: + detectors: + - env + - eks + - ec2 + - system + override: false + timeout: 10s + receivers: + k8s_cluster: + auth_type: serviceAccount + metadata_exporters: + - signalfx + prometheus/k8s_cluster_receiver: + config: + scrape_configs: + - job_name: otel-k8s-cluster-receiver + scrape_interval: 10s + static_configs: + - targets: + - ${K8S_POD_IP}:8889 + receiver_creator: + receivers: + kubeletstats: + config: + auth_type: serviceAccount + collection_interval: 10s + endpoint: '`endpoint`:`kubelet_endpoint_port`' + extra_metadata_labels: + - container.id + metric_groups: + - container + - pod + - node + rule: type == "k8s.node" && name contains "fargate" ${CR_KUBELET_STATS_NODE_FILTER} + watch_observers: + - k8s_observer + service: + extensions: + - health_check + - memory_ballast + - k8s_observer + pipelines: + metrics: + exporters: + - signalfx + processors: + - memory_limiter + - batch + - resource + - resource/k8s_cluster + receivers: + - k8s_cluster + metrics/collector: + exporters: + - signalfx + processors: + - memory_limiter + - batch + - resource + - resource/add_collector_k8s + - resourcedetection + receivers: + - prometheus/k8s_cluster_receiver + metrics/eks: + exporters: + - signalfx + processors: + - memory_limiter + - batch + - resource + receivers: + - receiver_creator + telemetry: + metrics: + address: 0.0.0.0:8889 diff --git a/rendered/manifests/eks-fargate/configmap-eks-fargate-cluster-receiver.yaml b/rendered/manifests/eks-fargate/configmap-eks-fargate-cluster-receiver.yaml new file mode 100644 index 0000000000..b1db1e0c64 --- /dev/null +++ b/rendered/manifests/eks-fargate/configmap-eks-fargate-cluster-receiver.yaml @@ -0,0 +1,23 @@ +--- +# Source: splunk-otel-collector/templates/configmap-eks-fargate-cluster-receiver.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: default-splunk-otel-collector-fargate-cr-script + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.43.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.43.0" + app: splunk-otel-collector + chart: splunk-otel-collector-0.43.0 + release: default + heritage: Helm +data: + script: | + set -ex + if [ -f /splunk-messages/environ ]; then + . /splunk-messages/environ + fi + /otelcol $@ diff --git a/rendered/manifests/eks-fargate/configmap-gateway.yaml b/rendered/manifests/eks-fargate/configmap-gateway.yaml new file mode 100644 index 0000000000..6c727d97a7 --- /dev/null +++ b/rendered/manifests/eks-fargate/configmap-gateway.yaml @@ -0,0 +1,192 @@ +--- +# Source: splunk-otel-collector/templates/configmap-gateway.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: default-splunk-otel-collector-otel-collector + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.43.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.43.0" + app: splunk-otel-collector + chart: splunk-otel-collector-0.43.0 + release: default + heritage: Helm +data: + relay: | + exporters: + sapm: + access_token: ${SPLUNK_OBSERVABILITY_ACCESS_TOKEN} + endpoint: https://ingest.CHANGEME.signalfx.com/v2/trace + signalfx: + access_token: ${SPLUNK_OBSERVABILITY_ACCESS_TOKEN} + api_url: https://api.CHANGEME.signalfx.com + ingest_url: https://ingest.CHANGEME.signalfx.com + extensions: + health_check: null + http_forwarder: + egress: + endpoint: https://api.CHANGEME.signalfx.com + memory_ballast: + size_mib: ${SPLUNK_BALLAST_SIZE_MIB} + zpages: null + processors: + batch: null + filter/logs: + logs: + exclude: + resource_attributes: + - key: splunk.com/exclude + value: "true" + k8sattributes: + extract: + annotations: + - from: pod + key: splunk.com/sourcetype + - from: namespace + key: splunk.com/exclude + tag_name: splunk.com/exclude + - from: pod + key: splunk.com/exclude + tag_name: splunk.com/exclude + - from: namespace + key: splunk.com/index + tag_name: com.splunk.index + - from: pod + key: splunk.com/index + tag_name: com.splunk.index + labels: + - key: app + metadata: + - k8s.namespace.name + - k8s.node.name + - k8s.pod.name + - k8s.pod.uid + pod_association: + - from: resource_attribute + name: k8s.pod.uid + - from: resource_attribute + name: k8s.pod.ip + - from: resource_attribute + name: ip + - from: connection + - from: resource_attribute + name: host.name + memory_limiter: + check_interval: 2s + limit_mib: ${SPLUNK_MEMORY_LIMIT_MIB} + resource/add_cluster_name: + attributes: + - action: upsert + key: k8s.cluster.name + value: CHANGEME + resource/add_collector_k8s: + attributes: + - action: insert + key: k8s.node.name + value: ${K8S_NODE_NAME} + - action: insert + key: k8s.pod.name + value: ${K8S_POD_NAME} + - action: insert + key: k8s.pod.uid + value: ${K8S_POD_UID} + - action: insert + key: k8s.namespace.name + value: ${K8S_NAMESPACE} + resource/logs: + attributes: + - action: upsert + from_attribute: k8s.pod.annotations.splunk.com/sourcetype + key: com.splunk.sourcetype + - action: delete + key: k8s.pod.annotations.splunk.com/sourcetype + - action: delete + key: splunk.com/exclude + resourcedetection: + detectors: + - env + - eks + - ec2 + - system + override: false + timeout: 10s + receivers: + jaeger: + protocols: + grpc: + endpoint: 0.0.0.0:14250 + thrift_http: + endpoint: 0.0.0.0:14268 + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + prometheus/collector: + config: + scrape_configs: + - job_name: otel-collector + scrape_interval: 10s + static_configs: + - targets: + - ${K8S_POD_IP}:8889 + signalfx: + access_token_passthrough: true + endpoint: 0.0.0.0:9943 + zipkin: + endpoint: 0.0.0.0:9411 + service: + extensions: + - health_check + - memory_ballast + - zpages + - http_forwarder + pipelines: + logs/signalfx-events: + exporters: + - signalfx + processors: + - memory_limiter + - batch + receivers: + - signalfx + metrics: + exporters: + - signalfx + processors: + - memory_limiter + - batch + - resource/add_cluster_name + receivers: + - otlp + - signalfx + metrics/collector: + exporters: + - signalfx + processors: + - memory_limiter + - batch + - resource/add_cluster_name + - resource/add_collector_k8s + - resourcedetection + receivers: + - prometheus/collector + traces: + exporters: + - sapm + processors: + - memory_limiter + - batch + - k8sattributes + - resource/add_cluster_name + receivers: + - otlp + - jaeger + - zipkin + telemetry: + metrics: + address: 0.0.0.0:8889 diff --git a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml new file mode 100644 index 0000000000..785d101e05 --- /dev/null +++ b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml @@ -0,0 +1,143 @@ +--- +# Source: splunk-otel-collector/templates/deployment-cluster-receiver.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: default-splunk-otel-collector-k8s-cluster-receiver + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.43.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.43.0" + app: splunk-otel-collector + component: otel-k8s-cluster-receiver + chart: splunk-otel-collector-0.43.0 + release: default + heritage: Helm + app.kubernetes.io/component: otel-k8s-cluster-receiver +spec: + replicas: 2 + serviceName: default-splunk-otel-collector-k8s-cluster-receiver + podManagementPolicy: Parallel + selector: + matchLabels: + app: splunk-otel-collector + component: otel-k8s-cluster-receiver + release: default + template: + metadata: + labels: + app: splunk-otel-collector + component: otel-k8s-cluster-receiver + release: default + annotations: + checksum/config: fcd0a843fe7773209d4ce3b119f812608b3e7aab12bf9a645329de9120eddd7d + spec: + serviceAccountName: default-splunk-otel-collector + nodeSelector: + kubernetes.io/os: linux + initContainers: + + - name: cluster-receiver-node-discoverer + image: public.ecr.aws/amazonlinux/amazonlinux:latest + imagePullPolicy: IfNotPresent + command: [ "bash", "-c", "/splunk-scripts/lookup-eks-fargate-receiver-node.sh"] + securityContext: + runAsUser: 0 + env: + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: default-splunk-otel-collector-cr-node-discoverer-script + mountPath: /splunk-scripts + - name: messages + mountPath: /splunk-messages + - mountPath: /conf + name: collector-configmap + containers: + - name: otel-collector + command: + - bash + - -c + - "/splunk-scripts/eks-fargate-otelcol-with-env.sh --config=/splunk-messages/config.yaml --metrics-addr=0.0.0.0:8889" + image: quay.io/signalfx/splunk-otel-collector:0.43.0 + imagePullPolicy: IfNotPresent + env: + - name: SPLUNK_MEMORY_TOTAL_MIB + value: "500" + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: K8S_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: K8S_POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: K8S_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: SPLUNK_OBSERVABILITY_ACCESS_TOKEN + valueFrom: + secretKeyRef: + name: splunk-otel-collector + key: splunk_observability_access_token + readinessProbe: + httpGet: + path: / + port: 13133 + livenessProbe: + httpGet: + path: / + port: 13133 + resources: + limits: + cpu: 200m + memory: 500Mi + volumeMounts: + - mountPath: /conf + name: collector-configmap + - mountPath: /splunk-scripts + name: default-splunk-otel-collector-fargate-cr-script + - mountPath: /splunk-messages + name: messages + terminationGracePeriodSeconds: 600 + volumes: + - name: collector-configmap + configMap: + name: default-splunk-otel-collector-otel-k8s-cluster-receiver + items: + - key: relay + path: relay.yaml + - name: default-splunk-otel-collector-fargate-cr-script + configMap: + name: default-splunk-otel-collector-fargate-cr-script + items: + - key: script + path: eks-fargate-otelcol-with-env.sh + mode: 0777 + - name: default-splunk-otel-collector-cr-node-discoverer-script + configMap: + name: default-splunk-otel-collector-cr-node-discoverer-script + items: + - key: script + path: lookup-eks-fargate-receiver-node.sh + mode: 0777 + - name: messages + emptyDir: {} diff --git a/rendered/manifests/eks-fargate/deployment-gateway.yaml b/rendered/manifests/eks-fargate/deployment-gateway.yaml new file mode 100644 index 0000000000..bbc7325ffe --- /dev/null +++ b/rendered/manifests/eks-fargate/deployment-gateway.yaml @@ -0,0 +1,121 @@ +--- +# Source: splunk-otel-collector/templates/deployment-gateway.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: default-splunk-otel-collector + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.43.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.43.0" + app: splunk-otel-collector + component: otel-collector + chart: splunk-otel-collector-0.43.0 + release: default + heritage: Helm + app.kubernetes.io/component: otel-collector +spec: + replicas: 3 + selector: + matchLabels: + app: splunk-otel-collector + component: otel-collector + release: default + template: + metadata: + labels: + app: splunk-otel-collector + component: otel-collector + release: default + annotations: + checksum/config: e4244516eb9109cf1d80da5e64d70d1c465bdff47cb0adff728726175d8a87e2 + spec: + serviceAccountName: default-splunk-otel-collector + nodeSelector: + kubernetes.io/os: linux + containers: + - name: otel-collector + command: + - /otelcol + - --config=/conf/relay.yaml + image: quay.io/signalfx/splunk-otel-collector:0.43.0 + imagePullPolicy: IfNotPresent + env: + - name: SPLUNK_MEMORY_TOTAL_MIB + value: "8192" + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: K8S_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: K8S_POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: K8S_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: SPLUNK_OBSERVABILITY_ACCESS_TOKEN + valueFrom: + secretKeyRef: + name: splunk-otel-collector + key: splunk_observability_access_token + ports: + - name: http-forwarder + containerPort: 6060 + protocol: TCP + - name: jaeger-grpc + containerPort: 14250 + protocol: TCP + - name: jaeger-thrift + containerPort: 14268 + protocol: TCP + - name: otlp + containerPort: 4317 + protocol: TCP + - name: otlp-http + containerPort: 4318 + protocol: TCP + - name: otlp-http-old + containerPort: 55681 + protocol: TCP + - name: signalfx + containerPort: 9943 + protocol: TCP + - name: zipkin + containerPort: 9411 + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 13133 + livenessProbe: + httpGet: + path: / + port: 13133 + resources: + limits: + cpu: 4 + memory: 8Gi + volumeMounts: + - mountPath: /conf + name: collector-configmap + terminationGracePeriodSeconds: 600 + volumes: + - name: collector-configmap + configMap: + name: default-splunk-otel-collector-otel-collector + items: + - key: relay + path: relay.yaml diff --git a/rendered/manifests/eks-fargate/secret.yaml b/rendered/manifests/eks-fargate/secret.yaml new file mode 100644 index 0000000000..6495f93518 --- /dev/null +++ b/rendered/manifests/eks-fargate/secret.yaml @@ -0,0 +1,19 @@ +--- +# Source: splunk-otel-collector/templates/secret.yaml +apiVersion: v1 +kind: Secret +metadata: + name: splunk-otel-collector + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.43.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.43.0" + app: splunk-otel-collector + chart: splunk-otel-collector-0.43.0 + release: default + heritage: Helm +type: Opaque +data: + splunk_observability_access_token: Q0hBTkdFTUU= diff --git a/rendered/manifests/eks-fargate/service-cluster-receiver-stateful-set.yaml b/rendered/manifests/eks-fargate/service-cluster-receiver-stateful-set.yaml new file mode 100644 index 0000000000..7b4caba456 --- /dev/null +++ b/rendered/manifests/eks-fargate/service-cluster-receiver-stateful-set.yaml @@ -0,0 +1,13 @@ +--- +# Source: splunk-otel-collector/templates/service-cluster-receiver-stateful-set.yaml +apiVersion: v1 +kind: Service +metadata: + name: default-splunk-otel-collector-k8s-cluster-receiver + labels: + app: splunk-otel-collector +spec: + clusterIP: None + selector: + app: splunk-otel-collector + component: otel-k8s-cluster-receiver diff --git a/rendered/manifests/eks-fargate/service.yaml b/rendered/manifests/eks-fargate/service.yaml new file mode 100644 index 0000000000..4625227452 --- /dev/null +++ b/rendered/manifests/eks-fargate/service.yaml @@ -0,0 +1,57 @@ +--- +# Source: splunk-otel-collector/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: default-splunk-otel-collector + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.43.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.43.0" + app: splunk-otel-collector + component: otel-collector + chart: splunk-otel-collector-0.43.0 + release: default + heritage: Helm + app.kubernetes.io/component: otel-collector +spec: + type: ClusterIP + ports: + - name: http-forwarder + port: 6060 + targetPort: http-forwarder + protocol: TCP + - name: jaeger-grpc + port: 14250 + targetPort: jaeger-grpc + protocol: TCP + - name: jaeger-thrift + port: 14268 + targetPort: jaeger-thrift + protocol: TCP + - name: otlp + port: 4317 + targetPort: otlp + protocol: TCP + - name: otlp-http + port: 4318 + targetPort: otlp-http + protocol: TCP + - name: otlp-http-old + port: 55681 + targetPort: otlp-http-old + protocol: TCP + - name: signalfx + port: 9943 + targetPort: signalfx + protocol: TCP + - name: zipkin + port: 9411 + targetPort: zipkin + protocol: TCP + selector: + app: splunk-otel-collector + component: otel-collector + release: default diff --git a/rendered/manifests/eks-fargate/serviceAccount.yaml b/rendered/manifests/eks-fargate/serviceAccount.yaml new file mode 100644 index 0000000000..f78e2b88a6 --- /dev/null +++ b/rendered/manifests/eks-fargate/serviceAccount.yaml @@ -0,0 +1,16 @@ +--- +# Source: splunk-otel-collector/templates/serviceAccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: default-splunk-otel-collector + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.43.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.43.0" + app: splunk-otel-collector + chart: splunk-otel-collector-0.43.0 + release: default + heritage: Helm diff --git a/rendered/manifests/metrics-only/configmap-cluster-receiver.yaml b/rendered/manifests/metrics-only/configmap-cluster-receiver.yaml index 0050ec98de..7bb4d8dcef 100644 --- a/rendered/manifests/metrics-only/configmap-cluster-receiver.yaml +++ b/rendered/manifests/metrics-only/configmap-cluster-receiver.yaml @@ -36,9 +36,6 @@ data: - action: insert key: metric_source value: kubernetes - - action: insert - key: receiver - value: k8scluster - action: upsert key: k8s.cluster.name value: CHANGEME @@ -56,6 +53,11 @@ data: - action: insert key: k8s.namespace.name value: ${K8S_NAMESPACE} + resource/k8s_cluster: + attributes: + - action: insert + key: receiver + value: k8scluster resourcedetection: detectors: - env @@ -87,6 +89,7 @@ data: - memory_limiter - batch - resource + - resource/k8s_cluster receivers: - k8s_cluster metrics/collector: diff --git a/rendered/manifests/metrics-only/deployment-cluster-receiver.yaml b/rendered/manifests/metrics-only/deployment-cluster-receiver.yaml index 346b9b288e..e89a8abb2e 100644 --- a/rendered/manifests/metrics-only/deployment-cluster-receiver.yaml +++ b/rendered/manifests/metrics-only/deployment-cluster-receiver.yaml @@ -30,7 +30,7 @@ spec: component: otel-k8s-cluster-receiver release: default annotations: - checksum/config: 95fad8a7a34350631ceacc2fc8498d3ae05d1be9df4fd9e71dd6036be4fa1919 + checksum/config: fe6a43ddd66a904c3e515232f6581738fba02d015ac9e5a55b98122b120e4bf2 spec: serviceAccountName: default-splunk-otel-collector nodeSelector: diff --git a/rendered/manifests/otel-logs/configmap-cluster-receiver.yaml b/rendered/manifests/otel-logs/configmap-cluster-receiver.yaml index 0050ec98de..7bb4d8dcef 100644 --- a/rendered/manifests/otel-logs/configmap-cluster-receiver.yaml +++ b/rendered/manifests/otel-logs/configmap-cluster-receiver.yaml @@ -36,9 +36,6 @@ data: - action: insert key: metric_source value: kubernetes - - action: insert - key: receiver - value: k8scluster - action: upsert key: k8s.cluster.name value: CHANGEME @@ -56,6 +53,11 @@ data: - action: insert key: k8s.namespace.name value: ${K8S_NAMESPACE} + resource/k8s_cluster: + attributes: + - action: insert + key: receiver + value: k8scluster resourcedetection: detectors: - env @@ -87,6 +89,7 @@ data: - memory_limiter - batch - resource + - resource/k8s_cluster receivers: - k8s_cluster metrics/collector: diff --git a/rendered/manifests/otel-logs/deployment-cluster-receiver.yaml b/rendered/manifests/otel-logs/deployment-cluster-receiver.yaml index 346b9b288e..e89a8abb2e 100644 --- a/rendered/manifests/otel-logs/deployment-cluster-receiver.yaml +++ b/rendered/manifests/otel-logs/deployment-cluster-receiver.yaml @@ -30,7 +30,7 @@ spec: component: otel-k8s-cluster-receiver release: default annotations: - checksum/config: 95fad8a7a34350631ceacc2fc8498d3ae05d1be9df4fd9e71dd6036be4fa1919 + checksum/config: fe6a43ddd66a904c3e515232f6581738fba02d015ac9e5a55b98122b120e4bf2 spec: serviceAccountName: default-splunk-otel-collector nodeSelector: From 799bb524117315f325e7264ff1043e8ae0ebb97e Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Wed, 26 Jan 2022 17:15:49 +0000 Subject: [PATCH 02/14] static volume mount names and removed unnecessary conditions --- .../config/_otel-k8s-cluster-receiver-config.tpl | 2 +- .../templates/deployment-cluster-receiver.yaml | 10 +++------- .../eks-fargate/deployment-cluster-receiver.yaml | 8 ++++---- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl index 21325d76b9..2045de5932 100644 --- a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl +++ b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl @@ -241,7 +241,7 @@ service: fieldRef: fieldPath: spec.nodeName volumeMounts: - - name: {{ template "splunk-otel-collector.clusterReceiverNodeDiscovererScript" . }} + - name: eks-fargate-node-discoverer-script mountPath: /splunk-scripts - name: messages mountPath: /splunk-messages diff --git a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml index c88a9e5e00..c6d00ee62c 100644 --- a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml +++ b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml @@ -155,9 +155,7 @@ spec: name: collector-configmap {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} - mountPath: /splunk-scripts - name: {{ template "splunk-otel-collector.eksFargateClusterReceiverScript" . }} - {{- end }} - {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + name: eks-fargate-cr-script - mountPath: /splunk-messages name: messages {{- end }} @@ -173,16 +171,14 @@ spec: - key: relay path: relay.yaml {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} - - name: {{ template "splunk-otel-collector.eksFargateClusterReceiverScript" . }} + - name: eks-fargate-cr-script configMap: name: {{ template "splunk-otel-collector.eksFargateClusterReceiverScript" . }} items: - key: script path: eks-fargate-otelcol-with-env.sh mode: 0777 - {{- end }} - {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} - - name: {{ template "splunk-otel-collector.clusterReceiverNodeDiscovererScript" . }} + - name: eks-fargate-node-discoverer-script configMap: name: {{ template "splunk-otel-collector.clusterReceiverNodeDiscovererScript" . }} items: diff --git a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml index 785d101e05..486cb498b8 100644 --- a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml +++ b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml @@ -55,7 +55,7 @@ spec: fieldRef: fieldPath: spec.nodeName volumeMounts: - - name: default-splunk-otel-collector-cr-node-discoverer-script + - name: eks-fargate-node-discoverer-script mountPath: /splunk-scripts - name: messages mountPath: /splunk-messages @@ -114,7 +114,7 @@ spec: - mountPath: /conf name: collector-configmap - mountPath: /splunk-scripts - name: default-splunk-otel-collector-fargate-cr-script + name: eks-fargate-cr-script - mountPath: /splunk-messages name: messages terminationGracePeriodSeconds: 600 @@ -125,14 +125,14 @@ spec: items: - key: relay path: relay.yaml - - name: default-splunk-otel-collector-fargate-cr-script + - name: eks-fargate-cr-script configMap: name: default-splunk-otel-collector-fargate-cr-script items: - key: script path: eks-fargate-otelcol-with-env.sh mode: 0777 - - name: default-splunk-otel-collector-cr-node-discoverer-script + - name: eks-fargate-node-discoverer-script configMap: name: default-splunk-otel-collector-cr-node-discoverer-script items: From fe1de3188368a26cb4ddc1f7bc496f0ef2a6ec57 Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Wed, 26 Jan 2022 18:14:05 +0000 Subject: [PATCH 03/14] hardcode kubectl sha --- .../scripts/lookup-eks-fargate-receiver-node.sh | 6 ++---- .../configmap-cluster-receiver-node-discoverer-script.yaml | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/helm-charts/splunk-otel-collector/scripts/lookup-eks-fargate-receiver-node.sh b/helm-charts/splunk-otel-collector/scripts/lookup-eks-fargate-receiver-node.sh index 38c79c5de1..723befa44e 100644 --- a/helm-charts/splunk-otel-collector/scripts/lookup-eks-fargate-receiver-node.sh +++ b/helm-charts/splunk-otel-collector/scripts/lookup-eks-fargate-receiver-node.sh @@ -21,11 +21,9 @@ echo "Labelling our fargate node to denote it hosts the cluster receiver" # download kubectl (verifying checksum) curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.15/2020-11-02/bin/linux/amd64/kubectl -curl -o kubectl.sha256 https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.15/2020-11-02/bin/linux/amd64/kubectl.sha256 ACTUAL=$(sha256sum kubectl | awk '{print $1}') -EXPECTED=$(cat kubectl.sha256 | awk '{print $1}') -if [ "${ACTUAL}" != "${EXPECTED}" ]; then - echo "will not attempt to use kubectl with unexpected sha256 (${ACTUAL} != ${EXPECTED})" +if [ "${ACTUAL}" != "e76b2f1271a5046686e03d6c68a16a34de736cfff30c92d80c9d6d87fe3cdc6c" ]; then + echo "will not attempt to use kubectl with unexpected sha256 (${ACTUAL} != e76b2f1271a5046686e03d6c68a16a34de736cfff30c92d80c9d6d87fe3cdc6c)" exit 1 fi chmod a+x kubectl diff --git a/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml b/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml index 0dfda5a4d2..e669df5060 100644 --- a/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml +++ b/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml @@ -39,11 +39,9 @@ data: # download kubectl (verifying checksum) curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.15/2020-11-02/bin/linux/amd64/kubectl - curl -o kubectl.sha256 https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.15/2020-11-02/bin/linux/amd64/kubectl.sha256 ACTUAL=$(sha256sum kubectl | awk '{print $1}') - EXPECTED=$(cat kubectl.sha256 | awk '{print $1}') - if [ "${ACTUAL}" != "${EXPECTED}" ]; then - echo "will not attempt to use kubectl with unexpected sha256 (${ACTUAL} != ${EXPECTED})" + if [ "${ACTUAL}" != "e76b2f1271a5046686e03d6c68a16a34de736cfff30c92d80c9d6d87fe3cdc6c" ]; then + echo "will not attempt to use kubectl with unexpected sha256 (${ACTUAL} != e76b2f1271a5046686e03d6c68a16a34de736cfff30c92d80c9d6d87fe3cdc6c)" exit 1 fi chmod a+x kubectl From 4d0b476d0215e92cb756db800e99d7fd7c62f050 Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Wed, 26 Jan 2022 19:35:23 +0000 Subject: [PATCH 04/14] don't use env vars for CR logic --- .../scripts/eks-fargate-otelcol-with-env.sh | 5 --- ...h => init-eks-fargate-cluster-receiver.sh} | 39 ++++++++----------- .../_otel-k8s-cluster-receiver-config.tpl | 8 ++-- ...uster-receiver-node-discoverer-script.yaml | 2 +- ...onfigmap-eks-fargate-cluster-receiver.yaml | 16 -------- .../deployment-cluster-receiver.yaml | 21 +++------- ...uster-receiver-node-discoverer-script.yaml | 39 ++++++++----------- .../configmap-cluster-receiver.yaml | 4 +- ...onfigmap-eks-fargate-cluster-receiver.yaml | 23 ----------- .../deployment-cluster-receiver.yaml | 24 ++++-------- 10 files changed, 53 insertions(+), 128 deletions(-) delete mode 100644 helm-charts/splunk-otel-collector/scripts/eks-fargate-otelcol-with-env.sh rename helm-charts/splunk-otel-collector/scripts/{lookup-eks-fargate-receiver-node.sh => init-eks-fargate-cluster-receiver.sh} (58%) delete mode 100644 helm-charts/splunk-otel-collector/templates/configmap-eks-fargate-cluster-receiver.yaml delete mode 100644 rendered/manifests/eks-fargate/configmap-eks-fargate-cluster-receiver.yaml diff --git a/helm-charts/splunk-otel-collector/scripts/eks-fargate-otelcol-with-env.sh b/helm-charts/splunk-otel-collector/scripts/eks-fargate-otelcol-with-env.sh deleted file mode 100644 index 2b5d01afb2..0000000000 --- a/helm-charts/splunk-otel-collector/scripts/eks-fargate-otelcol-with-env.sh +++ /dev/null @@ -1,5 +0,0 @@ -set -ex -if [ -f /splunk-messages/environ ]; then - . /splunk-messages/environ -fi -/otelcol $@ diff --git a/helm-charts/splunk-otel-collector/scripts/lookup-eks-fargate-receiver-node.sh b/helm-charts/splunk-otel-collector/scripts/init-eks-fargate-cluster-receiver.sh similarity index 58% rename from helm-charts/splunk-otel-collector/scripts/lookup-eks-fargate-receiver-node.sh rename to helm-charts/splunk-otel-collector/scripts/init-eks-fargate-cluster-receiver.sh index 723befa44e..8fc33a0724 100644 --- a/helm-charts/splunk-otel-collector/scripts/lookup-eks-fargate-receiver-node.sh +++ b/helm-charts/splunk-otel-collector/scripts/init-eks-fargate-cluster-receiver.sh @@ -1,17 +1,20 @@ #! /usr/bin/bash set -ex +echo "Downloading yq" +curl -L -o yq https://github.com/mikefarah/yq/releases/download/v4.16.2/yq_linux_amd64 +ACTUAL=$(sha256sum yq | awk '{print $1}') +if [ "${ACTUAL}" != "5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d" ]; then + echo "will not attempt to use yq with unexpected sha256 (${ACTUAL} != 5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d)" + exit 1 +fi +chmod a+x yq + # If we are the first pod (cluster receiver), set the kubelet stats node filter to only follow labelled nodes. # This node label will be set by the second pod. if [[ "${K8S_POD_NAME}" == *-0 ]]; then - echo "will configure kubelet stats receiver to follow node ${FIRST_CR_REPLICA_NODE_NAME}, as well as use cluster receiver." - echo "export CR_KUBELET_STATS_NODE_FILTER='&& labels[\"splunk-otel-is-eks-fargate-cluster-receiver-node\"] == \"true\"'" >/splunk-messages/environ - echo "export CR_K8S_OBSERVER_OBSERVE_PODS='false'" >>/splunk-messages/environ - - cat /splunk-messages/environ - - # copy config to meet container command args - cp /conf/relay.yaml /splunk-messages/config.yaml + echo "will configure kubelet stats receiver to follow other StatefulSet replica's node, as well as use cluster receiver." + ./yq e '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && labels[\"splunk-otel-is-eks-fargate-cluster-receiver-node\"] == \"true\""' /conf/relay.yaml >/splunk-messages/config.yaml exit 0 fi @@ -20,10 +23,10 @@ fi echo "Labelling our fargate node to denote it hosts the cluster receiver" # download kubectl (verifying checksum) -curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.15/2020-11-02/bin/linux/amd64/kubectl +curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.20.4/2021-04-12/bin/linux/amd64/kubectl ACTUAL=$(sha256sum kubectl | awk '{print $1}') -if [ "${ACTUAL}" != "e76b2f1271a5046686e03d6c68a16a34de736cfff30c92d80c9d6d87fe3cdc6c" ]; then - echo "will not attempt to use kubectl with unexpected sha256 (${ACTUAL} != e76b2f1271a5046686e03d6c68a16a34de736cfff30c92d80c9d6d87fe3cdc6c)" +if [ "${ACTUAL}" != "e84ff8c607b2a10f635c312403f9ede40a045404957e55adcf3d663f9e32c630" ]; then + echo "will not attempt to use kubectl with unexpected sha256 (${ACTUAL} != e84ff8c607b2a10f635c312403f9ede40a045404957e55adcf3d663f9e32c630)" exit 1 fi chmod a+x kubectl @@ -31,19 +34,11 @@ chmod a+x kubectl ./kubectl label nodes $K8S_NODE_NAME splunk-otel-is-eks-fargate-cluster-receiver-node=true echo "Disabling k8s_cluster receiver for this instance" -# download yq to strip k8s_cluster receiver -curl -L -o yq https://github.com/mikefarah/yq/releases/download/v4.16.2/yq_linux_amd64 -ACTUAL=$(sha256sum yq | awk '{print $1}') -if [ "${ACTUAL}" != "5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d" ]; then - echo "will not attempt to use yq with unexpected sha256 (${ACTUAL} != 5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d)" - exit 1 -fi -chmod a+x yq # strip k8s_cluster and its pipeline ./yq e 'del(.service.pipelines.metrics)' /conf/relay.yaml >/splunk-messages/config.yaml ./yq e -i 'del(.receivers.k8s_cluster)' /splunk-messages/config.yaml # set kubelet stats to not monitor ourselves (all other kubelets) -echo "EKS kubelet stats receiver node lookup not applicable for $K8S_POD_NAME. Ensuring it won't monitor itself to avoid Fargate network limitation." -echo "export CR_KUBELET_STATS_NODE_FILTER='&& not ( name contains \"${K8S_NODE_NAME}\" )'" >/splunk-messages/environ -echo "export CR_K8S_OBSERVER_OBSERVE_PODS='true'" >>/splunk-messages/environ +echo "Ensuring k8s_observer-based kubeletstats receivers won't monitor own node to avoid Fargate network limitation." +./yq e -i '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && not ( name contains \"${K8S_NODE_NAME}\" )"' /splunk-messages/config.yaml +./yq e -i '.extensions.k8s_observer.observe_pods = true' /splunk-messages/config.yaml diff --git a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl index 2045de5932..15289889c8 100644 --- a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl +++ b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl @@ -15,7 +15,7 @@ extensions: # k8s_observer w/ pod and node detection for eks/fargate deployment k8s_observer: auth_type: serviceAccount - observe_pods: ${CR_K8S_OBSERVER_OBSERVE_PODS} + observe_pods: false observe_nodes: true {{- end }} @@ -56,7 +56,7 @@ receivers: receiver_creator: receivers: kubeletstats: - rule: type == "k8s.node" && name contains "fargate" ${CR_KUBELET_STATS_NODE_FILTER} + rule: type == "k8s.node" && name contains "fargate" config: auth_type: serviceAccount collection_interval: 10s @@ -228,7 +228,7 @@ service: - name: cluster-receiver-node-discoverer image: public.ecr.aws/amazonlinux/amazonlinux:latest imagePullPolicy: IfNotPresent - command: [ "bash", "-c", "/splunk-scripts/lookup-eks-fargate-receiver-node.sh"] + command: [ "bash", "-c", "/splunk-scripts/init-eks-fargate-cluster-receiver.sh"] securityContext: runAsUser: 0 env: @@ -241,7 +241,7 @@ service: fieldRef: fieldPath: spec.nodeName volumeMounts: - - name: eks-fargate-node-discoverer-script + - name: init-eks-fargate-cluster-receiver-script mountPath: /splunk-scripts - name: messages mountPath: /splunk-messages diff --git a/helm-charts/splunk-otel-collector/templates/configmap-cluster-receiver-node-discoverer-script.yaml b/helm-charts/splunk-otel-collector/templates/configmap-cluster-receiver-node-discoverer-script.yaml index 74456d931e..175428334a 100644 --- a/helm-charts/splunk-otel-collector/templates/configmap-cluster-receiver-node-discoverer-script.yaml +++ b/helm-charts/splunk-otel-collector/templates/configmap-cluster-receiver-node-discoverer-script.yaml @@ -12,5 +12,5 @@ metadata: heritage: {{ .Release.Service }} data: script: | - {{- (.Files.Get "scripts/lookup-eks-fargate-receiver-node.sh") | nindent 4 }} + {{- (.Files.Get "scripts/init-eks-fargate-cluster-receiver.sh") | nindent 4 }} {{- end }} diff --git a/helm-charts/splunk-otel-collector/templates/configmap-eks-fargate-cluster-receiver.yaml b/helm-charts/splunk-otel-collector/templates/configmap-eks-fargate-cluster-receiver.yaml deleted file mode 100644 index 24695487d3..0000000000 --- a/helm-charts/splunk-otel-collector/templates/configmap-eks-fargate-cluster-receiver.yaml +++ /dev/null @@ -1,16 +0,0 @@ -{{ $clusterReceiver := fromYaml (include "splunk-otel-collector.clusterReceiver" .) }} -{{ if and $clusterReceiver.enabled (eq (include "splunk-otel-collector.metricsEnabled" .) "true") (eq (include "splunk-otel-collector.distribution" .) "eks/fargate") }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ template "splunk-otel-collector.eksFargateClusterReceiverScript" . }} - labels: - {{- include "splunk-otel-collector.commonLabels" . | nindent 4 }} - app: {{ template "splunk-otel-collector.name" . }} - chart: {{ template "splunk-otel-collector.chart" . }} - release: {{ .Release.Name }} - heritage: {{ .Release.Service }} -data: - script: | - {{- (.Files.Get "scripts/eks-fargate-otelcol-with-env.sh") | nindent 4 }} -{{- end }} diff --git a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml index c6d00ee62c..6189d9b17b 100644 --- a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml +++ b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml @@ -76,11 +76,6 @@ spec: containers: - name: otel-collector command: - {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} - - bash - - -c - - "/splunk-scripts/eks-fargate-otelcol-with-env.sh --config=/splunk-messages/config.yaml --metrics-addr=0.0.0.0:8889" - {{- else }} {{- if .Values.isWindows }} - powershell.exe - -command @@ -88,6 +83,9 @@ spec: - --config=C:\\conf\relay.yaml {{- else }} - /otelcol + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} + - --config=/splunk-messages/config.yaml + {{- else }} - --config=/conf/relay.yaml {{- end }} {{- end }} @@ -154,8 +152,6 @@ spec: - mountPath: {{ .Values.isWindows | ternary "C:\\conf" "/conf" }} name: collector-configmap {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} - - mountPath: /splunk-scripts - name: eks-fargate-cr-script - mountPath: /splunk-messages name: messages {{- end }} @@ -171,19 +167,12 @@ spec: - key: relay path: relay.yaml {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} - - name: eks-fargate-cr-script - configMap: - name: {{ template "splunk-otel-collector.eksFargateClusterReceiverScript" . }} - items: - - key: script - path: eks-fargate-otelcol-with-env.sh - mode: 0777 - - name: eks-fargate-node-discoverer-script + - name: init-eks-fargate-cluster-receiver-script configMap: name: {{ template "splunk-otel-collector.clusterReceiverNodeDiscovererScript" . }} items: - key: script - path: lookup-eks-fargate-receiver-node.sh + path: init-eks-fargate-cluster-receiver.sh mode: 0777 - name: messages emptyDir: {} diff --git a/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml b/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml index e669df5060..39a48d4914 100644 --- a/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml +++ b/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml @@ -19,17 +19,20 @@ data: #! /usr/bin/bash set -ex + echo "Downloading yq" + curl -L -o yq https://github.com/mikefarah/yq/releases/download/v4.16.2/yq_linux_amd64 + ACTUAL=$(sha256sum yq | awk '{print $1}') + if [ "${ACTUAL}" != "5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d" ]; then + echo "will not attempt to use yq with unexpected sha256 (${ACTUAL} != 5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d)" + exit 1 + fi + chmod a+x yq + # If we are the first pod (cluster receiver), set the kubelet stats node filter to only follow labelled nodes. # This node label will be set by the second pod. if [[ "${K8S_POD_NAME}" == *-0 ]]; then - echo "will configure kubelet stats receiver to follow node ${FIRST_CR_REPLICA_NODE_NAME}, as well as use cluster receiver." - echo "export CR_KUBELET_STATS_NODE_FILTER='&& labels[\"splunk-otel-is-eks-fargate-cluster-receiver-node\"] == \"true\"'" >/splunk-messages/environ - echo "export CR_K8S_OBSERVER_OBSERVE_PODS='false'" >>/splunk-messages/environ - - cat /splunk-messages/environ - - # copy config to meet container command args - cp /conf/relay.yaml /splunk-messages/config.yaml + echo "will configure kubelet stats receiver to follow other StatefulSet replica's node, as well as use cluster receiver." + ./yq e '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && labels[\"splunk-otel-is-eks-fargate-cluster-receiver-node\"] == \"true\""' /conf/relay.yaml >/splunk-messages/config.yaml exit 0 fi @@ -38,10 +41,10 @@ data: echo "Labelling our fargate node to denote it hosts the cluster receiver" # download kubectl (verifying checksum) - curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.15/2020-11-02/bin/linux/amd64/kubectl + curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.20.4/2021-04-12/bin/linux/amd64/kubectl ACTUAL=$(sha256sum kubectl | awk '{print $1}') - if [ "${ACTUAL}" != "e76b2f1271a5046686e03d6c68a16a34de736cfff30c92d80c9d6d87fe3cdc6c" ]; then - echo "will not attempt to use kubectl with unexpected sha256 (${ACTUAL} != e76b2f1271a5046686e03d6c68a16a34de736cfff30c92d80c9d6d87fe3cdc6c)" + if [ "${ACTUAL}" != "e84ff8c607b2a10f635c312403f9ede40a045404957e55adcf3d663f9e32c630" ]; then + echo "will not attempt to use kubectl with unexpected sha256 (${ACTUAL} != e84ff8c607b2a10f635c312403f9ede40a045404957e55adcf3d663f9e32c630)" exit 1 fi chmod a+x kubectl @@ -49,19 +52,11 @@ data: ./kubectl label nodes $K8S_NODE_NAME splunk-otel-is-eks-fargate-cluster-receiver-node=true echo "Disabling k8s_cluster receiver for this instance" - # download yq to strip k8s_cluster receiver - curl -L -o yq https://github.com/mikefarah/yq/releases/download/v4.16.2/yq_linux_amd64 - ACTUAL=$(sha256sum yq | awk '{print $1}') - if [ "${ACTUAL}" != "5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d" ]; then - echo "will not attempt to use yq with unexpected sha256 (${ACTUAL} != 5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d)" - exit 1 - fi - chmod a+x yq # strip k8s_cluster and its pipeline ./yq e 'del(.service.pipelines.metrics)' /conf/relay.yaml >/splunk-messages/config.yaml ./yq e -i 'del(.receivers.k8s_cluster)' /splunk-messages/config.yaml # set kubelet stats to not monitor ourselves (all other kubelets) - echo "EKS kubelet stats receiver node lookup not applicable for $K8S_POD_NAME. Ensuring it won't monitor itself to avoid Fargate network limitation." - echo "export CR_KUBELET_STATS_NODE_FILTER='&& not ( name contains \"${K8S_NODE_NAME}\" )'" >/splunk-messages/environ - echo "export CR_K8S_OBSERVER_OBSERVE_PODS='true'" >>/splunk-messages/environ + echo "Ensuring k8s_observer-based kubeletstats receivers won't monitor own node to avoid Fargate network limitation." + ./yq e -i '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && not ( name contains \"${K8S_NODE_NAME}\" )"' /splunk-messages/config.yaml + ./yq e -i '.extensions.k8s_observer.observe_pods = true' /splunk-messages/config.yaml diff --git a/rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml b/rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml index a468742d28..1c8a6786e8 100644 --- a/rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml +++ b/rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml @@ -27,7 +27,7 @@ data: k8s_observer: auth_type: serviceAccount observe_nodes: true - observe_pods: ${CR_K8S_OBSERVER_OBSERVE_PODS} + observe_pods: false memory_ballast: size_mib: ${SPLUNK_BALLAST_SIZE_MIB} processors: @@ -96,7 +96,7 @@ data: - container - pod - node - rule: type == "k8s.node" && name contains "fargate" ${CR_KUBELET_STATS_NODE_FILTER} + rule: type == "k8s.node" && name contains "fargate" watch_observers: - k8s_observer service: diff --git a/rendered/manifests/eks-fargate/configmap-eks-fargate-cluster-receiver.yaml b/rendered/manifests/eks-fargate/configmap-eks-fargate-cluster-receiver.yaml deleted file mode 100644 index b1db1e0c64..0000000000 --- a/rendered/manifests/eks-fargate/configmap-eks-fargate-cluster-receiver.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -# Source: splunk-otel-collector/templates/configmap-eks-fargate-cluster-receiver.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: default-splunk-otel-collector-fargate-cr-script - labels: - app.kubernetes.io/name: splunk-otel-collector - helm.sh/chart: splunk-otel-collector-0.43.0 - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/instance: default - app.kubernetes.io/version: "0.43.0" - app: splunk-otel-collector - chart: splunk-otel-collector-0.43.0 - release: default - heritage: Helm -data: - script: | - set -ex - if [ -f /splunk-messages/environ ]; then - . /splunk-messages/environ - fi - /otelcol $@ diff --git a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml index 486cb498b8..922b7504fd 100644 --- a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml +++ b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml @@ -32,7 +32,7 @@ spec: component: otel-k8s-cluster-receiver release: default annotations: - checksum/config: fcd0a843fe7773209d4ce3b119f812608b3e7aab12bf9a645329de9120eddd7d + checksum/config: 9614b18c1d29819cc4908903a9ae2d41b51e8f529fc301fd7509afa262e15be5 spec: serviceAccountName: default-splunk-otel-collector nodeSelector: @@ -42,7 +42,7 @@ spec: - name: cluster-receiver-node-discoverer image: public.ecr.aws/amazonlinux/amazonlinux:latest imagePullPolicy: IfNotPresent - command: [ "bash", "-c", "/splunk-scripts/lookup-eks-fargate-receiver-node.sh"] + command: [ "bash", "-c", "/splunk-scripts/init-eks-fargate-cluster-receiver.sh"] securityContext: runAsUser: 0 env: @@ -55,7 +55,7 @@ spec: fieldRef: fieldPath: spec.nodeName volumeMounts: - - name: eks-fargate-node-discoverer-script + - name: init-eks-fargate-cluster-receiver-script mountPath: /splunk-scripts - name: messages mountPath: /splunk-messages @@ -64,9 +64,8 @@ spec: containers: - name: otel-collector command: - - bash - - -c - - "/splunk-scripts/eks-fargate-otelcol-with-env.sh --config=/splunk-messages/config.yaml --metrics-addr=0.0.0.0:8889" + - /otelcol + - --config=/splunk-messages/config.yaml image: quay.io/signalfx/splunk-otel-collector:0.43.0 imagePullPolicy: IfNotPresent env: @@ -113,8 +112,6 @@ spec: volumeMounts: - mountPath: /conf name: collector-configmap - - mountPath: /splunk-scripts - name: eks-fargate-cr-script - mountPath: /splunk-messages name: messages terminationGracePeriodSeconds: 600 @@ -125,19 +122,12 @@ spec: items: - key: relay path: relay.yaml - - name: eks-fargate-cr-script - configMap: - name: default-splunk-otel-collector-fargate-cr-script - items: - - key: script - path: eks-fargate-otelcol-with-env.sh - mode: 0777 - - name: eks-fargate-node-discoverer-script + - name: init-eks-fargate-cluster-receiver-script configMap: name: default-splunk-otel-collector-cr-node-discoverer-script items: - key: script - path: lookup-eks-fargate-receiver-node.sh + path: init-eks-fargate-cluster-receiver.sh mode: 0777 - name: messages emptyDir: {} From 87e310805085b04ae93e3ead338cf66a7289f6fa Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Wed, 26 Jan 2022 21:15:18 +0000 Subject: [PATCH 05/14] correct node label name --- docs/advanced-configuration.md | 17 +++++++++-------- .../init-eks-fargate-cluster-receiver.sh | 4 ++-- ...cluster-receiver-node-discoverer-script.yaml | 4 ++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/advanced-configuration.md b/docs/advanced-configuration.md index 091121e905..aa3853c5a3 100644 --- a/docs/advanced-configuration.md +++ b/docs/advanced-configuration.md @@ -176,17 +176,18 @@ This distribution will operate similarly to the `eks` distribution but with the running as agents must be configured manually as sidecar containers in your custom deployments. This includes any application logging services like Fluentd. We recommend setting the `gateway.enabled` to `true` and configuring your instrumented applications to report metrics, traces, and logs to the gateway's `-splunk-otel-collector` service address if no -agent instances are used in your cluster. Any desired agent instances that would run as a daemonset should be run as a sidecar container in your pod. +agent instances are used in your cluster. Any desired agent instances that would run as a daemonset should instead run as sidecar containers in your pods. 2. The Collector's ClusterRole for `eks/fargate` will allow the `patch` verb on `nodes` resources for the default API groups. This is to allow -the Cluster Receiver's init container to add a `splunk-otel-is-eks-fargate-cluster-receiver-node` node label for self monitoring. This label is currently -required for reporting kubelet and pod metrics for the cluster receiver StatefulSet described below. -3. The configured Cluster Receiver is deployed as a 2-replica StatefulSet and uses a +the cluster receiver's init container to add a `splunk-otel-eks-fargate-kubeletstats-receiver-node` node label for designated self monitoring. This label is +currently required for reporting kubelet and pod metrics for the complete cluster receiver StatefulSet. +3. The configured cluster receiver is deployed as a 2-replica StatefulSet and uses a [Kubernetes Observer extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/extension/observer/k8sobserver/README.md) -that discovers the cluster's nodes and pods. It uses this to dynamically create +that discovers the cluster's nodes and, on the second replica, its pods. It uses this to dynamically create [Kubelet Stats receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/kubeletstatsreceiver/README.md) -instances that will report kubelet metrics for all observed Fargate nodes, distributed across replicas. The first replica will monitor all kubelets -except its own (due to an EKS/Fargate networking restriction) and the second will monitor the first replica's. This is made possible by the Fargate-specific -deployment label mentioned above. The second replica will also have a k8s_cluster receiver instance. +instances that will report kubelet metrics for all observed Fargate nodes, distributed across replicas. The first replica will monitor the +cluster with a `k8s_cluster` receiver and the second will monitor all kubelets except its own (due to an EKS/Fargate networking restriction). +The first replica's collector will monitor the second's kubelet. This is made possible by the Fargate-specific node label +mentioned above. The second replica will have the underlying `k8s_cluster` receiver instance. ## Logs collection diff --git a/helm-charts/splunk-otel-collector/scripts/init-eks-fargate-cluster-receiver.sh b/helm-charts/splunk-otel-collector/scripts/init-eks-fargate-cluster-receiver.sh index 8fc33a0724..463e85bfbc 100644 --- a/helm-charts/splunk-otel-collector/scripts/init-eks-fargate-cluster-receiver.sh +++ b/helm-charts/splunk-otel-collector/scripts/init-eks-fargate-cluster-receiver.sh @@ -14,7 +14,7 @@ chmod a+x yq # This node label will be set by the second pod. if [[ "${K8S_POD_NAME}" == *-0 ]]; then echo "will configure kubelet stats receiver to follow other StatefulSet replica's node, as well as use cluster receiver." - ./yq e '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && labels[\"splunk-otel-is-eks-fargate-cluster-receiver-node\"] == \"true\""' /conf/relay.yaml >/splunk-messages/config.yaml + ./yq e '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && labels[\"splunk-otel-eks-fargate-kubeletstats-receiver-node\"] == \"true\""' /conf/relay.yaml >/splunk-messages/config.yaml exit 0 fi @@ -31,7 +31,7 @@ if [ "${ACTUAL}" != "e84ff8c607b2a10f635c312403f9ede40a045404957e55adcf3d663f9e3 fi chmod a+x kubectl # label node -./kubectl label nodes $K8S_NODE_NAME splunk-otel-is-eks-fargate-cluster-receiver-node=true +./kubectl label nodes $K8S_NODE_NAME splunk-otel-eks-fargate-kubeletstats-receiver-node=true echo "Disabling k8s_cluster receiver for this instance" # strip k8s_cluster and its pipeline diff --git a/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml b/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml index 39a48d4914..c93e4b5389 100644 --- a/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml +++ b/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml @@ -32,7 +32,7 @@ data: # This node label will be set by the second pod. if [[ "${K8S_POD_NAME}" == *-0 ]]; then echo "will configure kubelet stats receiver to follow other StatefulSet replica's node, as well as use cluster receiver." - ./yq e '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && labels[\"splunk-otel-is-eks-fargate-cluster-receiver-node\"] == \"true\""' /conf/relay.yaml >/splunk-messages/config.yaml + ./yq e '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && labels[\"splunk-otel-eks-fargate-kubeletstats-receiver-node\"] == \"true\""' /conf/relay.yaml >/splunk-messages/config.yaml exit 0 fi @@ -49,7 +49,7 @@ data: fi chmod a+x kubectl # label node - ./kubectl label nodes $K8S_NODE_NAME splunk-otel-is-eks-fargate-cluster-receiver-node=true + ./kubectl label nodes $K8S_NODE_NAME splunk-otel-eks-fargate-kubeletstats-receiver-node=true echo "Disabling k8s_cluster receiver for this instance" # strip k8s_cluster and its pipeline From 7a75ed45af44b8eb0e785a8556c2d7ecfaf63c9a Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Thu, 27 Jan 2022 22:04:38 +0000 Subject: [PATCH 06/14] reword eks/fargate differences --- docs/advanced-configuration.md | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/docs/advanced-configuration.md b/docs/advanced-configuration.md index aa3853c5a3..743c68cd1e 100644 --- a/docs/advanced-configuration.md +++ b/docs/advanced-configuration.md @@ -177,17 +177,20 @@ running as agents must be configured manually as sidecar containers in your cust logging services like Fluentd. We recommend setting the `gateway.enabled` to `true` and configuring your instrumented applications to report metrics, traces, and logs to the gateway's `-splunk-otel-collector` service address if no agent instances are used in your cluster. Any desired agent instances that would run as a daemonset should instead run as sidecar containers in your pods. -2. The Collector's ClusterRole for `eks/fargate` will allow the `patch` verb on `nodes` resources for the default API groups. This is to allow -the cluster receiver's init container to add a `splunk-otel-eks-fargate-kubeletstats-receiver-node` node label for designated self monitoring. This label is -currently required for reporting kubelet and pod metrics for the complete cluster receiver StatefulSet. -3. The configured cluster receiver is deployed as a 2-replica StatefulSet and uses a -[Kubernetes Observer extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/extension/observer/k8sobserver/README.md) -that discovers the cluster's nodes and, on the second replica, its pods. It uses this to dynamically create -[Kubelet Stats receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/kubeletstatsreceiver/README.md) -instances that will report kubelet metrics for all observed Fargate nodes, distributed across replicas. The first replica will monitor the -cluster with a `k8s_cluster` receiver and the second will monitor all kubelets except its own (due to an EKS/Fargate networking restriction). -The first replica's collector will monitor the second's kubelet. This is made possible by the Fargate-specific node label -mentioned above. The second replica will have the underlying `k8s_cluster` receiver instance. + +2. Since Fargate nodes use a VM boundary to prevent access to host-based resources used by other pods, pods are not able to reach their own kubelet. The cluster receiver +for the Fargate distribution has two primary differences between regular `eks` to work around this limitation: + * The configured cluster receiver is deployed as a 2-replica StatefulSet instead of a Deployment and uses a + [Kubernetes Observer extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/extension/observer/k8sobserver/README.md) + that discovers the cluster's nodes and, on the second replica, its pods. It uses this to dynamically create + [Kubelet Stats receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/kubeletstatsreceiver/README.md) + instances that will report kubelet metrics for all observed Fargate nodes. The first replica will monitor the cluster with a `k8s_cluster` receiver + and the second will monitor all kubelets except its own (due to an EKS/Fargate networking restriction). + The second replica will have the underlying `k8s_cluster` receiver instance. + + * The first replica's collector will monitor the second's kubelet. This is made possible by a Fargate-specific `splunk-otel-eks-fargate-kubeletstats-receiver-node` + node label. The Collector's ClusterRole for `eks/fargate` will allow the `patch` verb on `nodes` resources for the default API groups to allow the cluster + receiver's init container to add this node label for designated self monitoring. ## Logs collection From a8a51656670756b8b5e515f713a30709deee9993 Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Mon, 31 Jan 2022 15:19:53 +0000 Subject: [PATCH 07/14] eks/fargate: observe_pods by default --- .../scripts/init-eks-fargate-cluster-receiver.sh | 2 +- .../templates/config/_otel-k8s-cluster-receiver-config.tpl | 2 +- .../configmap-cluster-receiver-node-discoverer-script.yaml | 2 +- rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml | 2 +- rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/helm-charts/splunk-otel-collector/scripts/init-eks-fargate-cluster-receiver.sh b/helm-charts/splunk-otel-collector/scripts/init-eks-fargate-cluster-receiver.sh index 463e85bfbc..26afab176b 100644 --- a/helm-charts/splunk-otel-collector/scripts/init-eks-fargate-cluster-receiver.sh +++ b/helm-charts/splunk-otel-collector/scripts/init-eks-fargate-cluster-receiver.sh @@ -15,6 +15,7 @@ chmod a+x yq if [[ "${K8S_POD_NAME}" == *-0 ]]; then echo "will configure kubelet stats receiver to follow other StatefulSet replica's node, as well as use cluster receiver." ./yq e '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && labels[\"splunk-otel-eks-fargate-kubeletstats-receiver-node\"] == \"true\""' /conf/relay.yaml >/splunk-messages/config.yaml + ./yq e -i '.extensions.k8s_observer.observe_pods = false' /splunk-messages/config.yaml exit 0 fi @@ -41,4 +42,3 @@ echo "Disabling k8s_cluster receiver for this instance" # set kubelet stats to not monitor ourselves (all other kubelets) echo "Ensuring k8s_observer-based kubeletstats receivers won't monitor own node to avoid Fargate network limitation." ./yq e -i '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && not ( name contains \"${K8S_NODE_NAME}\" )"' /splunk-messages/config.yaml -./yq e -i '.extensions.k8s_observer.observe_pods = true' /splunk-messages/config.yaml diff --git a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl index 15289889c8..4105eb0957 100644 --- a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl +++ b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl @@ -15,7 +15,7 @@ extensions: # k8s_observer w/ pod and node detection for eks/fargate deployment k8s_observer: auth_type: serviceAccount - observe_pods: false + observe_pods: true observe_nodes: true {{- end }} diff --git a/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml b/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml index c93e4b5389..06c2bb1435 100644 --- a/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml +++ b/rendered/manifests/eks-fargate/configmap-cluster-receiver-node-discoverer-script.yaml @@ -33,6 +33,7 @@ data: if [[ "${K8S_POD_NAME}" == *-0 ]]; then echo "will configure kubelet stats receiver to follow other StatefulSet replica's node, as well as use cluster receiver." ./yq e '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && labels[\"splunk-otel-eks-fargate-kubeletstats-receiver-node\"] == \"true\""' /conf/relay.yaml >/splunk-messages/config.yaml + ./yq e -i '.extensions.k8s_observer.observe_pods = false' /splunk-messages/config.yaml exit 0 fi @@ -59,4 +60,3 @@ data: # set kubelet stats to not monitor ourselves (all other kubelets) echo "Ensuring k8s_observer-based kubeletstats receivers won't monitor own node to avoid Fargate network limitation." ./yq e -i '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && not ( name contains \"${K8S_NODE_NAME}\" )"' /splunk-messages/config.yaml - ./yq e -i '.extensions.k8s_observer.observe_pods = true' /splunk-messages/config.yaml diff --git a/rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml b/rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml index 1c8a6786e8..b5ae606bdc 100644 --- a/rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml +++ b/rendered/manifests/eks-fargate/configmap-cluster-receiver.yaml @@ -27,7 +27,7 @@ data: k8s_observer: auth_type: serviceAccount observe_nodes: true - observe_pods: false + observe_pods: true memory_ballast: size_mib: ${SPLUNK_BALLAST_SIZE_MIB} processors: diff --git a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml index 922b7504fd..0ae632825b 100644 --- a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml +++ b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml @@ -32,7 +32,7 @@ spec: component: otel-k8s-cluster-receiver release: default annotations: - checksum/config: 9614b18c1d29819cc4908903a9ae2d41b51e8f529fc301fd7509afa262e15be5 + checksum/config: 6bae8b72d7e224b89e1decd55ee30ad1a945976c7b250db54e8bba13790c4536 spec: serviceAccountName: default-splunk-otel-collector nodeSelector: From 6c4625d56cf030da0c36e6e67d52cf54816a1d9c Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Mon, 31 Jan 2022 15:43:11 +0000 Subject: [PATCH 08/14] remove unnecessary initContainers tpl --- .../templates/_helpers.tpl | 16 ----------- .../templates/clusterRole.yaml | 2 +- .../_otel-k8s-cluster-receiver-config.tpl | 27 ------------------- .../deployment-cluster-receiver.yaml | 25 +++++++++++++++-- .../deployment-cluster-receiver.yaml | 5 ++-- 5 files changed, 26 insertions(+), 49 deletions(-) diff --git a/helm-charts/splunk-otel-collector/templates/_helpers.tpl b/helm-charts/splunk-otel-collector/templates/_helpers.tpl index a3e6c80a8f..f6bd70f86f 100644 --- a/helm-charts/splunk-otel-collector/templates/_helpers.tpl +++ b/helm-charts/splunk-otel-collector/templates/_helpers.tpl @@ -322,19 +322,3 @@ compatibility with the old config group name: "otelK8sClusterReceiver". {{- define "splunk-otel-collector.clusterReceiverNodeDiscovererScript" -}} {{ printf "%s-cr-node-discoverer-script" ( include "splunk-otel-collector.fullname" . ) | trunc 63 | trimSuffix "-" }} {{- end -}} - -{{/* -"eksFargateClusterReceiverScript" for the eks/fargate cluster receiver statefulSet run command -*/}} -{{- define "splunk-otel-collector.eksFargateClusterReceiverScript" -}} -{{ printf "%s-fargate-cr-script" ( include "splunk-otel-collector.fullname" . ) | trunc 63 | trimSuffix "-" }} -{{- end -}} - -{{/* -"clusterReceiverNodeDiscovererInitContainerEnabled" that's based on clusterReceiver.enabled, o11yMetricsEnabled, and eks/fargate distribution -*/}} -{{- define "splunk-otel-collector.clusterReceiverNodeDiscovererInitContainerEnabled" -}} -{{- $clusterReceiver := fromYaml (include "splunk-otel-collector.clusterReceiver" .) }} -{{- $o11yMetricsEnabled := (include "splunk-otel-collector.o11yMetricsEnabled" .) }} -{{- and (eq (toString $clusterReceiver.enabled) "true") (eq (toString $o11yMetricsEnabled) "true") (eq (include "splunk-otel-collector.distribution" .) "eks/fargate") -}} -{{- end -}} diff --git a/helm-charts/splunk-otel-collector/templates/clusterRole.yaml b/helm-charts/splunk-otel-collector/templates/clusterRole.yaml index ebb9d8ba2e..d43de509c2 100644 --- a/helm-charts/splunk-otel-collector/templates/clusterRole.yaml +++ b/helm-charts/splunk-otel-collector/templates/clusterRole.yaml @@ -86,7 +86,7 @@ rules: - get - list - watch -{{- if eq (include "splunk-otel-collector.clusterReceiverNodeDiscovererInitContainerEnabled" .) "true" }} +{{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} - apiGroups: - "" resources: diff --git a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl index 4105eb0957..478301a1e2 100644 --- a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl +++ b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl @@ -222,30 +222,3 @@ service: {{- end }} {{- end }} {{- end }} - -{{- define "splunk-otel-collector.clusterReceiverInitContainers" -}} -{{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} -- name: cluster-receiver-node-discoverer - image: public.ecr.aws/amazonlinux/amazonlinux:latest - imagePullPolicy: IfNotPresent - command: [ "bash", "-c", "/splunk-scripts/init-eks-fargate-cluster-receiver.sh"] - securityContext: - runAsUser: 0 - env: - - name: K8S_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: K8S_NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - volumeMounts: - - name: init-eks-fargate-cluster-receiver-script - mountPath: /splunk-scripts - - name: messages - mountPath: /splunk-messages - - mountPath: /conf - name: collector-configmap -{{- end -}} -{{- end -}} diff --git a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml index 6189d9b17b..ed3fc8e213 100644 --- a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml +++ b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml @@ -69,9 +69,30 @@ spec: securityContext: {{ toYaml $clusterReceiver.securityContext | nindent 8 }} {{- end }} - {{- if (include "splunk-otel-collector.clusterReceiverInitContainers" .) }} + {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} initContainers: - {{ include "splunk-otel-collector.clusterReceiverInitContainers" . | indent 8 }} + - name: cluster-receiver-node-labeler + image: public.ecr.aws/amazonlinux/amazonlinux:latest + imagePullPolicy: IfNotPresent + command: ["bash", "-c", "/splunk-scripts/init-eks-fargate-cluster-receiver.sh"] + securityContext: + runAsUser: 0 + env: + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: init-eks-fargate-cluster-receiver-script + mountPath: /splunk-scripts + - name: messages + mountPath: /splunk-messages + - mountPath: /conf + name: collector-configmap {{- end }} containers: - name: otel-collector diff --git a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml index 0ae632825b..31caa433af 100644 --- a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml +++ b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml @@ -38,11 +38,10 @@ spec: nodeSelector: kubernetes.io/os: linux initContainers: - - - name: cluster-receiver-node-discoverer + - name: cluster-receiver-node-labeler image: public.ecr.aws/amazonlinux/amazonlinux:latest imagePullPolicy: IfNotPresent - command: [ "bash", "-c", "/splunk-scripts/init-eks-fargate-cluster-receiver.sh"] + command: ["bash", "-c", "/splunk-scripts/init-eks-fargate-cluster-receiver.sh"] securityContext: runAsUser: 0 env: From 751061c407a558179954c26332799e3a7566b20f Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Mon, 31 Jan 2022 15:48:53 +0000 Subject: [PATCH 09/14] remove incorrect eks/fargate doc statement --- docs/advanced-configuration.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/advanced-configuration.md b/docs/advanced-configuration.md index 743c68cd1e..ad6d614308 100644 --- a/docs/advanced-configuration.md +++ b/docs/advanced-configuration.md @@ -182,11 +182,10 @@ agent instances are used in your cluster. Any desired agent instances that would for the Fargate distribution has two primary differences between regular `eks` to work around this limitation: * The configured cluster receiver is deployed as a 2-replica StatefulSet instead of a Deployment and uses a [Kubernetes Observer extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/extension/observer/k8sobserver/README.md) - that discovers the cluster's nodes and, on the second replica, its pods. It uses this to dynamically create + that discovers the cluster's nodes and, on the second replica, its pods for user-configurable receiver creator additions. It uses this observer to dynamically create [Kubelet Stats receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/kubeletstatsreceiver/README.md) instances that will report kubelet metrics for all observed Fargate nodes. The first replica will monitor the cluster with a `k8s_cluster` receiver and the second will monitor all kubelets except its own (due to an EKS/Fargate networking restriction). - The second replica will have the underlying `k8s_cluster` receiver instance. * The first replica's collector will monitor the second's kubelet. This is made possible by a Fargate-specific `splunk-otel-eks-fargate-kubeletstats-receiver-node` node label. The Collector's ClusterRole for `eks/fargate` will allow the `patch` verb on `nodes` resources for the default API groups to allow the cluster From 2170e4e0c231eea77f785adef6e0481f316b7005 Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Mon, 31 Jan 2022 16:04:08 +0000 Subject: [PATCH 10/14] restrict node discovery script --- .../templates/deployment-cluster-receiver.yaml | 2 +- rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml index ed3fc8e213..fbcc2b1e81 100644 --- a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml +++ b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml @@ -194,7 +194,7 @@ spec: items: - key: script path: init-eks-fargate-cluster-receiver.sh - mode: 0777 + mode: 0555 - name: messages emptyDir: {} {{- end }} diff --git a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml index 31caa433af..6c682d1d78 100644 --- a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml +++ b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml @@ -127,6 +127,6 @@ spec: items: - key: script path: init-eks-fargate-cluster-receiver.sh - mode: 0777 + mode: 0555 - name: messages emptyDir: {} From 1a1b134209fb2570effbc8a637168460a063bae4 Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Mon, 31 Jan 2022 18:22:19 +0000 Subject: [PATCH 11/14] Add eks/fargate CR pod anti-affinity --- .../config/_otel-k8s-cluster-receiver-config.tpl | 15 +++++++++++++++ .../templates/deployment-cluster-receiver.yaml | 4 ++-- .../eks-fargate/deployment-cluster-receiver.yaml | 10 ++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl index 478301a1e2..4c8a1a7063 100644 --- a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl +++ b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl @@ -222,3 +222,18 @@ service: {{- end }} {{- end }} {{- end }} + +{{/* +Pod anti-affinity to prevent eks/fargate replicas from being on same node +*/}} +{{- define "splunk-otel-collector.clusterReceiverPodAntiAffinity" -}} +podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: component + operator: In + values: + - otel-k8s-cluster-receiver + topologyKey: "kubernetes.io/hostname" +{{- end }} diff --git a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml index fbcc2b1e81..baf8a77f82 100644 --- a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml +++ b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml @@ -61,9 +61,9 @@ spec: tolerations: {{ toYaml $clusterReceiver.tolerations | nindent 8 }} {{- end }} - {{- if $clusterReceiver.affinity }} + {{- if or $clusterReceiver.affinity (eq (include "splunk-otel-collector.distribution" .) "eks/fargate") }} affinity: - {{- toYaml $clusterReceiver.affinity | nindent 8 }} + {{- $clusterReceiver.affinity | mustMergeOverwrite (fromYaml (include "splunk-otel-collector.clusterReceiverPodAntiAffinity" .)) | toYaml | nindent 8 }} {{- end }} {{- if $clusterReceiver.securityContext }} securityContext: diff --git a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml index 6c682d1d78..c095afcc87 100644 --- a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml +++ b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml @@ -37,6 +37,16 @@ spec: serviceAccountName: default-splunk-otel-collector nodeSelector: kubernetes.io/os: linux + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: component + operator: In + values: + - otel-k8s-cluster-receiver + topologyKey: kubernetes.io/hostname initContainers: - name: cluster-receiver-node-labeler image: public.ecr.aws/amazonlinux/amazonlinux:latest From c8c7c433b92de9cc0b4d34ef423da39df7c96a6d Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Mon, 31 Jan 2022 18:27:08 +0000 Subject: [PATCH 12/14] rename node-labeler init container --- .../templates/deployment-cluster-receiver.yaml | 2 +- rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml index baf8a77f82..b5bea0890d 100644 --- a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml +++ b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml @@ -71,7 +71,7 @@ spec: {{- end }} {{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} initContainers: - - name: cluster-receiver-node-labeler + - name: cluster-receiver-node-discoverer image: public.ecr.aws/amazonlinux/amazonlinux:latest imagePullPolicy: IfNotPresent command: ["bash", "-c", "/splunk-scripts/init-eks-fargate-cluster-receiver.sh"] diff --git a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml index c095afcc87..18b7de3e7a 100644 --- a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml +++ b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml @@ -48,7 +48,7 @@ spec: - otel-k8s-cluster-receiver topologyKey: kubernetes.io/hostname initContainers: - - name: cluster-receiver-node-labeler + - name: cluster-receiver-node-discoverer image: public.ecr.aws/amazonlinux/amazonlinux:latest imagePullPolicy: IfNotPresent command: ["bash", "-c", "/splunk-scripts/init-eks-fargate-cluster-receiver.sh"] From 8380a9bbab6fa26c705aaf24b95f9b5bb5452b2d Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Tue, 1 Feb 2022 15:31:50 +0000 Subject: [PATCH 13/14] readability and permission improvements --- docs/advanced-configuration.md | 4 ++-- .../config/_otel-k8s-cluster-receiver-config.tpl | 15 --------------- .../templates/configmap-gateway.yaml | 2 +- .../templates/deployment-cluster-receiver.yaml | 15 ++++++++++++--- .../eks-fargate/deployment-cluster-receiver.yaml | 2 -- 5 files changed, 15 insertions(+), 23 deletions(-) diff --git a/docs/advanced-configuration.md b/docs/advanced-configuration.md index ad6d614308..2ac91618c4 100644 --- a/docs/advanced-configuration.md +++ b/docs/advanced-configuration.md @@ -175,8 +175,8 @@ This distribution will operate similarly to the `eks` distribution but with the 1. The Collector agent daemonset is not applied since Fargate doesn't support daemonsets. Any desired Collector instances running as agents must be configured manually as sidecar containers in your custom deployments. This includes any application logging services like Fluentd. We recommend setting the `gateway.enabled` to `true` and configuring your instrumented -applications to report metrics, traces, and logs to the gateway's `-splunk-otel-collector` service address if no -agent instances are used in your cluster. Any desired agent instances that would run as a daemonset should instead run as sidecar containers in your pods. +applications to report metrics, traces, and logs to the gateway's `-splunk-otel-collector` service address. +Any desired agent instances that would run as a daemonset should instead run as sidecar containers in your pods. 2. Since Fargate nodes use a VM boundary to prevent access to host-based resources used by other pods, pods are not able to reach their own kubelet. The cluster receiver for the Fargate distribution has two primary differences between regular `eks` to work around this limitation: diff --git a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl index 4c8a1a7063..478301a1e2 100644 --- a/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl +++ b/helm-charts/splunk-otel-collector/templates/config/_otel-k8s-cluster-receiver-config.tpl @@ -222,18 +222,3 @@ service: {{- end }} {{- end }} {{- end }} - -{{/* -Pod anti-affinity to prevent eks/fargate replicas from being on same node -*/}} -{{- define "splunk-otel-collector.clusterReceiverPodAntiAffinity" -}} -podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: component - operator: In - values: - - otel-k8s-cluster-receiver - topologyKey: "kubernetes.io/hostname" -{{- end }} diff --git a/helm-charts/splunk-otel-collector/templates/configmap-gateway.yaml b/helm-charts/splunk-otel-collector/templates/configmap-gateway.yaml index a38dbb4345..b97fcc7fcc 100644 --- a/helm-charts/splunk-otel-collector/templates/configmap-gateway.yaml +++ b/helm-charts/splunk-otel-collector/templates/configmap-gateway.yaml @@ -1,5 +1,5 @@ {{ $gateway := fromYaml (include "splunk-otel-collector.gateway" .) }} -{{ if or $gateway.enabled (eq (include "splunk-otel-collector.distribution" .) "eks/fargate") }} +{{ if $gateway.enabled }} apiVersion: v1 kind: ConfigMap metadata: diff --git a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml index b5bea0890d..d306767587 100644 --- a/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml +++ b/helm-charts/splunk-otel-collector/templates/deployment-cluster-receiver.yaml @@ -63,7 +63,18 @@ spec: {{- end }} {{- if or $clusterReceiver.affinity (eq (include "splunk-otel-collector.distribution" .) "eks/fargate") }} affinity: - {{- $clusterReceiver.affinity | mustMergeOverwrite (fromYaml (include "splunk-otel-collector.clusterReceiverPodAntiAffinity" .)) | toYaml | nindent 8 }} + {{- $clusterReceiverPodAntiAffinity := ` + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: component + operator: In + values: + - otel-k8s-cluster-receiver + topologyKey: kubernetes.io/hostname + ` }} + {{- $clusterReceiver.affinity | mustMergeOverwrite (fromYaml $clusterReceiverPodAntiAffinity) | toYaml | nindent 8 }} {{- end }} {{- if $clusterReceiver.securityContext }} securityContext: @@ -75,8 +86,6 @@ spec: image: public.ecr.aws/amazonlinux/amazonlinux:latest imagePullPolicy: IfNotPresent command: ["bash", "-c", "/splunk-scripts/init-eks-fargate-cluster-receiver.sh"] - securityContext: - runAsUser: 0 env: - name: K8S_POD_NAME valueFrom: diff --git a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml index 18b7de3e7a..1982419d0a 100644 --- a/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml +++ b/rendered/manifests/eks-fargate/deployment-cluster-receiver.yaml @@ -52,8 +52,6 @@ spec: image: public.ecr.aws/amazonlinux/amazonlinux:latest imagePullPolicy: IfNotPresent command: ["bash", "-c", "/splunk-scripts/init-eks-fargate-cluster-receiver.sh"] - securityContext: - runAsUser: 0 env: - name: K8S_POD_NAME valueFrom: From 8832c76468bc5dedc5e4c8ff63223a7073926972 Mon Sep 17 00:00:00 2001 From: Ryan Fitzpatrick Date: Tue, 1 Feb 2022 16:53:36 +0000 Subject: [PATCH 14/14] changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3a27ecc70..d5be84c1ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Unreleased +### Added + +- Add `eks/fargate` distribution option for 2-replica StatefulSet (#346) + ## [0.43.0] - 2022-01-27 ### Changed