diff --git a/.github/workflows/kube-stack-test.yaml b/.github/workflows/kube-stack-test.yaml index 4428b0f5e..18a588e20 100644 --- a/.github/workflows/kube-stack-test.yaml +++ b/.github/workflows/kube-stack-test.yaml @@ -21,11 +21,11 @@ jobs: create-kind-cluster: "true" # We'll need this eventually, but for now leave it commented. - # - name: Install cert-manager - # run: | - # kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.6.1/cert-manager.yaml - # kubectl wait --timeout=5m --for=condition=available deployment cert-manager -n cert-manager - # kubectl wait --timeout=5m --for=condition=available deployment cert-manager-webhook -n cert-manager + - name: Install cert-manager + run: | + kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.6.1/cert-manager.yaml + kubectl wait --timeout=5m --for=condition=available deployment cert-manager -n cert-manager + kubectl wait --timeout=5m --for=condition=available deployment cert-manager-webhook -n cert-manager - name: Run chart-testing (install) run: ct install --charts charts/opentelemetry-kube-stack diff --git a/charts/opentelemetry-kube-stack/Chart.lock b/charts/opentelemetry-kube-stack/Chart.lock index 1c790c581..6965cae03 100644 --- a/charts/opentelemetry-kube-stack/Chart.lock +++ b/charts/opentelemetry-kube-stack/Chart.lock @@ -4,6 +4,6 @@ dependencies: version: 0.0.0 - name: opentelemetry-operator repository: https://open-telemetry.github.io/opentelemetry-helm-charts - version: 0.61.0 -digest: sha256:0eedb0c3014ffbab4516d2ef28e1254e699daa4f64980033783e44f2b5ceed3e -generated: "2024-06-05T15:15:04.402697-04:00" + version: 0.63.2 +digest: sha256:245083890bfa77a68106cde7bb1227fce41e60a7cdbfd6c589d6b2038e8a7dd2 +generated: "2024-07-01T14:30:17.623074-04:00" diff --git a/charts/opentelemetry-kube-stack/Chart.yaml b/charts/opentelemetry-kube-stack/Chart.yaml index 0b453687b..de329ac91 100644 --- a/charts/opentelemetry-kube-stack/Chart.yaml +++ b/charts/opentelemetry-kube-stack/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: opentelemetry-kube-stack -version: 0.0.6 +version: 0.0.7 description: | OpenTelemetry Quickstart chart for Kubernetes. Installs an operator and collector for an easy way to get started with Kubernetes observability. @@ -13,12 +13,14 @@ maintainers: - name: dmitryax - name: TylerHelmuth icon: https://raw.githubusercontent.com/cncf/artwork/a718fa97fffec1b9fd14147682e9e3ac0c8817cb/projects/opentelemetry/icon/color/opentelemetry-icon-color.png -appVersion: 0.101.0 +# the appVersion stays aligned with the operator's latest version. If the collector has a patch +# release, the collector's latest patch will be manually overridden. +appVersion: 0.103.0 dependencies: - name: crds version: "0.0.0" condition: crds.install - name: opentelemetry-operator repository: https://open-telemetry.github.io/opentelemetry-helm-charts - version: 0.61.0 + version: 0.63.2 condition: opentelemetry-operator.enabled diff --git a/charts/opentelemetry-kube-stack/README.md b/charts/opentelemetry-kube-stack/README.md index 72420b709..97ebf1ec5 100644 --- a/charts/opentelemetry-kube-stack/README.md +++ b/charts/opentelemetry-kube-stack/README.md @@ -6,6 +6,35 @@ This Helm chart serves as a quickstart for OpenTelemetry in a Kubernetes environment. The chart installs an [OpenTelemetry Operator](https://github.com/open-telemetry/opentelemetry-operator) and a suite of collectors that help you get started with OpenTelemetry metrics, traces, and logs. +## Features + +This chart installs the OpenTelemetry Operator and two collector pools with the following features: +* Daemonset collector + * Kubernetes infrastructure metrics + * Applications logs + * OTLP trace receiver + * Kubernetes resource enrichment +* Standalone collector + * Kubernetes events + * Cluster metrics + +## Usage + +For example usage of this chart, please look in the examples/ folder where you can see how you can set a custom OTLP exporter for your desired destination. The example configuration also shows how to enable Instrumentation and OpAMP Bridge resources. + +### Image versioning + +The appVersion of the chart is aligned to the latest image version of the operator. Images are upgraded within the chart manually by setting the image tag to the latest release of each image used. This will be the latest patch release for the chart's appVersion. example: +``` +appVersion: 0.103.0 +collector.image.tag: 0.103.1 +bridge.image.tag: 0.103.0 +``` + +### scrape_configs_file Details + +By default, the daemonset collector will load in the daemon_scrape_configs.yaml file which collects prometheus metrics from applications on the same node that have the prometheus.io/scrape=true annotation, kubernetes node metrics, and cadvisor metrics. Users can disable this by settings collectors.daemon.scrape_configs_file: "" OR they can provide their own promethues scrape config file for the daemonset by supplying collectors.daemon.scrape_configs_file: ".yaml" + ## Prerequisites - Kubernetes 1.24+ is required for OpenTelemetry Operator installation diff --git a/charts/opentelemetry-kube-stack/daemon_scrape_configs.yaml b/charts/opentelemetry-kube-stack/daemon_scrape_configs.yaml new file mode 100644 index 000000000..8754c0bb4 --- /dev/null +++ b/charts/opentelemetry-kube-stack/daemon_scrape_configs.yaml @@ -0,0 +1,176 @@ +# Collect all metrics from pods on the daemon set's node with at least this annotation +# prometheus.io/scrape: 'true' +# This can be further customized by setting the following annotations: +# prometheus.io/scheme: 'https' +# prometheus.io/path: '/data/metrics' +# prometheus.io/port: '80' +- job_name: kubernetes-pods + scrape_interval: 30s + kubernetes_sd_configs: + - role: pod + selectors: + - role: pod + # only scrape data from pods running on the same node as collector + field: "spec.nodeName=$OTEL_K8S_NODE_NAME" + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: + [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow] + action: drop + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme] + action: replace + regex: (https?) + target_label: __scheme__ + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: + [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + # NOTE: otel collector uses env var replacement. $$ is used as a literal $. + replacement: $$1:$$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+) + replacement: __param_$$1 + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod + - source_labels: [__meta_kubernetes_pod_phase] + regex: Pending|Succeeded|Failed|Completed + action: drop + - action: replace + source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] + target_label: job +# This job is setup to scrape the node metrics on the same host as the daemonset +# https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/29053 +- job_name: node-exporter + scrape_interval: 30s + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - action: replace + regex: "(.*)" + replacement: "$1" + separator: ";" + source_labels: + - job + target_label: __tmp_prometheus_job_name + static_configs: + - targets: + - ${OTEL_K8S_NODE_IP}:9100 +# We still need to scrape kubelet's CAdvisor which isn't supported in any otel collector receiver +# https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/29053 +- authorization: + credentials_file: "/var/run/secrets/kubernetes.io/serviceaccount/token" + type: Bearer + follow_redirects: true + honor_labels: true + honor_timestamps: true + job_name: kubelet + kubernetes_sd_configs: + - follow_redirects: true + role: node + selectors: + - role: node + # only scrape data from pods running on the same node as collector + field: "metadata.name=$OTEL_K8S_NODE_NAME" + metric_relabel_configs: + - action: drop + regex: container_cpu_(load_average_10s|system_seconds_total|user_seconds_total) + replacement: "$1" + separator: ";" + source_labels: + - __name__ + - action: drop + regex: container_fs_(io_current|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total) + replacement: "$1" + separator: ";" + source_labels: + - __name__ + - action: drop + regex: container_memory_(mapped_file|swap) + replacement: "$1" + separator: ";" + source_labels: + - __name__ + - action: drop + regex: container_(file_descriptors|tasks_state|threads_max) + replacement: "$1" + separator: ";" + source_labels: + - __name__ + - action: drop + regex: container_spec.* + replacement: "$1" + separator: ";" + source_labels: + - __name__ + - action: drop + regex: ".+;" + replacement: "$1" + separator: ";" + source_labels: + - id + - pod + metrics_path: "/metrics/cadvisor" + relabel_configs: + - action: replace + regex: "(.*)" + replacement: "$1" + separator: ";" + source_labels: + - job + target_label: __tmp_prometheus_job_name + - action: replace + replacement: "kubelet" + target_label: job + - action: replace + regex: "(.*)" + replacement: "${1}" + separator: ";" + source_labels: + - __meta_kubernetes_node_name + target_label: node + - action: replace + regex: "(.*)" + replacement: https-metrics + separator: ";" + target_label: endpoint + - action: replace + regex: "(.*)" + replacement: "$1" + separator: ";" + source_labels: + - __metrics_path__ + target_label: metrics_path + - action: hashmod + modulus: 1 + regex: "(.*)" + replacement: "$1" + separator: ";" + source_labels: + - __address__ + target_label: __tmp_hash + - action: keep + regex: "$(SHARD)" + replacement: "$1" + separator: ";" + source_labels: + - __tmp_hash + scheme: https + scrape_interval: 15s + scrape_timeout: 10s + tls_config: + ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + insecure_skip_verify: true diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/bridge.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/bridge.yaml new file mode 100644 index 000000000..50094f1e6 --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/bridge.yaml @@ -0,0 +1,62 @@ +--- +# Source: opentelemetry-kube-stack/templates/bridge.yaml +apiVersion: opentelemetry.io/v1alpha1 +kind: OpAMPBridge +metadata: + name: example + labels: + helm.sh/chart: opentelemetry-kube-stack-0.0.7 + app.kubernetes.io/version: "0.103.0" + app.kubernetes.io/managed-by: Helm + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": hook-failed +spec: + endpoint: http://opamp-server:8080 + capabilities: + AcceptsOpAMPConnectionSettings: true + AcceptsOtherConnectionSettings: true + AcceptsRemoteConfig: true + AcceptsRestartCommand: true + ReportsEffectiveConfig: true + ReportsHealth: true + ReportsOwnLogs: true + ReportsOwnMetrics: true + ReportsOwnTraces: true + ReportsRemoteConfig: true + ReportsStatus: true + replicas: 1 + image: "ghcr.io/open-telemetry/opentelemetry-operator/operator-opamp-bridge:0.103.0" + upgradeStrategy: automatic + securityContext: + runAsNonRoot: true + runAsUser: 1000 + resources: + limits: + cpu: 250m + memory: 256Mi + requests: + cpu: 250m + memory: 256Mi + env: + - name: OTEL_K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OTEL_K8S_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: OTEL_K8S_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: OTEL_K8S_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: OTEL_RESOURCE_ATTRIBUTES + value: "k8s.cluster.name=demo" diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/clusterrole.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/clusterrole.yaml new file mode 100644 index 000000000..5c1540fb9 --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/clusterrole.yaml @@ -0,0 +1,174 @@ +--- +# Source: opentelemetry-kube-stack/templates/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: example-collector +rules: +- apiGroups: [""] + resources: + - namespaces + - nodes + - nodes/proxy + - nodes/metrics + - nodes/stats + - services + - endpoints + - pods + - events + - secrets + verbs: ["get", "list", "watch"] +- apiGroups: ["monitoring.coreos.com"] + resources: + - servicemonitors + - podmonitors + verbs: ["get", "list", "watch"] +- apiGroups: + - extensions + resources: + - ingresses + verbs: ["get", "list", "watch"] +- apiGroups: + - apps + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: ["get", "list", "watch"] +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: ["get", "list", "watch"] +- apiGroups: ["discovery.k8s.io"] + resources: + - endpointslices + verbs: ["get", "list", "watch"] +- nonResourceURLs: ["/metrics", "/metrics/cadvisor"] + verbs: ["get"] + +- apiGroups: + - "" + resources: + - events + - namespaces + - namespaces/status + - nodes + - nodes/spec + - pods + - pods/status + - replicationcontrollers + - replicationcontrollers/status + - resourcequotas + - services + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - get + - list + - watch +- apiGroups: ["events.k8s.io"] + resources: ["events"] + verbs: ["watch", "list"] +--- +# Source: opentelemetry-kube-stack/templates/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: example-bridge +rules: + - apiGroups: + - opentelemetry.io + resources: + - opentelemetrycollectors + verbs: + - "*" + - apiGroups: + - "" + resources: + - pods + verbs: + - 'list' + - 'get' +--- +# Source: opentelemetry-kube-stack/templates/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: example-cluster-stats +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: example-collector +subjects: +- kind: ServiceAccount + # quirk of the Operator + name: "example-cluster-stats-collector" + namespace: default +--- +# Source: opentelemetry-kube-stack/templates/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: example-daemon +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: example-collector +subjects: +- kind: ServiceAccount + # quirk of the Operator + name: "example-daemon-collector" + namespace: default +--- +# Source: opentelemetry-kube-stack/templates/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: example +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: "example-bridge" +subjects: + - kind: ServiceAccount + # quirk of the Operator + name: "example-opamp-bridge" + namespace: "default" diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/collector.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/collector.yaml new file mode 100644 index 000000000..7ee28978f --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/collector.yaml @@ -0,0 +1,662 @@ +--- +# Source: opentelemetry-kube-stack/templates/collector.yaml +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: example-cluster-stats + namespace: default + labels: + helm.sh/chart: opentelemetry-kube-stack-0.0.7 + app.kubernetes.io/version: "0.103.0" + app.kubernetes.io/managed-by: Helm + opentelemetry.io/opamp-reporting: "true" + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": hook-failed +spec: + managementState: managed + mode: deployment + config: + exporters: + debug: {} + otlp: + endpoint: ingest.example.com:443 + headers: + access-token: ${ACCESS_TOKEN} + processors: + batch: + send_batch_max_size: 1500 + send_batch_size: 1000 + timeout: 1s + k8sattributes: + extract: + labels: + - from: pod + key: app.kubernetes.io/name + tag_name: service.name + - from: pod + key: k8s-app + tag_name: service.name + - from: pod + key: app.kubernetes.io/instance + tag_name: k8s.app.instance + - from: pod + key: app.kubernetes.io/version + tag_name: service.version + - from: pod + key: app.kubernetes.io/component + tag_name: k8s.app.component + metadata: + - k8s.namespace.name + - k8s.pod.name + - k8s.pod.uid + - k8s.node.name + - k8s.pod.start_time + - k8s.deployment.name + - k8s.replicaset.name + - k8s.replicaset.uid + - k8s.daemonset.name + - k8s.daemonset.uid + - k8s.job.name + - k8s.job.uid + - k8s.container.name + - k8s.cronjob.name + - k8s.statefulset.name + - k8s.statefulset.uid + - container.image.tag + - container.image.name + - k8s.cluster.uid + passthrough: false + pod_association: + - sources: + - from: resource_attribute + name: k8s.pod.uid + - sources: + - from: resource_attribute + name: k8s.pod.name + - from: resource_attribute + name: k8s.namespace.name + - from: resource_attribute + name: k8s.node.name + - sources: + - from: resource_attribute + name: k8s.pod.ip + - sources: + - from: resource_attribute + name: k8s.pod.name + - from: resource_attribute + name: k8s.namespace.name + - sources: + - from: connection + resourcedetection/env: + detectors: + - env + override: false + timeout: 2s + receivers: + k8s_cluster: + allocatable_types_to_report: + - cpu + - memory + - storage + auth_type: serviceAccount + collection_interval: 10s + node_conditions_to_report: + - Ready + - MemoryPressure + - DiskPressure + - NetworkUnavailable + k8sobjects: + objects: + - exclude_watch_type: + - DELETED + group: events.k8s.io + mode: watch + name: events + service: + pipelines: + logs: + exporters: + - debug + processors: + - k8sattributes + - resourcedetection/env + - batch + receivers: + - k8sobjects + metrics: + exporters: + - debug + - otlp + processors: + - k8sattributes + - resourcedetection/env + - batch + receivers: + - k8s_cluster + replicas: 1 + image: "otel/opentelemetry-collector-k8s:0.103.1" + imagePullPolicy: IfNotPresent + upgradeStrategy: automatic + hostNetwork: false + shareProcessNamespace: false + terminationGracePeriodSeconds: 30 + resources: + limits: + cpu: 100m + memory: 500Mi + requests: + cpu: 100m + memory: 500Mi + securityContext: + {} + volumeMounts: + env: + - name: OTEL_K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OTEL_K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: OTEL_K8S_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: OTEL_K8S_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: OTEL_K8S_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: OTEL_RESOURCE_ATTRIBUTES + value: "k8s.cluster.name=demo" + + - name: ACCESS_TOKEN + valueFrom: + secretKeyRef: + key: access_token + name: otel-collector-secret + volumes: +--- +# Source: opentelemetry-kube-stack/templates/collector.yaml +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: example-daemon + namespace: default + labels: + helm.sh/chart: opentelemetry-kube-stack-0.0.7 + app.kubernetes.io/version: "0.103.0" + app.kubernetes.io/managed-by: Helm + opentelemetry.io/opamp-reporting: "true" + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": hook-failed +spec: + managementState: managed + mode: daemonset + config: + exporters: + debug: {} + otlp: + endpoint: ingest.example.com:443 + headers: + access-token: ${ACCESS_TOKEN} + processors: + batch: + send_batch_max_size: 1500 + send_batch_size: 1000 + timeout: 1s + k8sattributes: + extract: + labels: + - from: pod + key: app.kubernetes.io/name + tag_name: service.name + - from: pod + key: k8s-app + tag_name: service.name + - from: pod + key: app.kubernetes.io/instance + tag_name: k8s.app.instance + - from: pod + key: app.kubernetes.io/version + tag_name: service.version + - from: pod + key: app.kubernetes.io/component + tag_name: k8s.app.component + metadata: + - k8s.namespace.name + - k8s.pod.name + - k8s.pod.uid + - k8s.node.name + - k8s.pod.start_time + - k8s.deployment.name + - k8s.replicaset.name + - k8s.replicaset.uid + - k8s.daemonset.name + - k8s.daemonset.uid + - k8s.job.name + - k8s.job.uid + - k8s.container.name + - k8s.cronjob.name + - k8s.statefulset.name + - k8s.statefulset.uid + - container.image.tag + - container.image.name + - k8s.cluster.uid + filter: + node_from_env_var: K8S_NODE_NAME + passthrough: false + pod_association: + - sources: + - from: resource_attribute + name: k8s.pod.uid + - sources: + - from: resource_attribute + name: k8s.pod.name + - from: resource_attribute + name: k8s.namespace.name + - from: resource_attribute + name: k8s.node.name + - sources: + - from: resource_attribute + name: k8s.pod.ip + - sources: + - from: resource_attribute + name: k8s.pod.name + - from: resource_attribute + name: k8s.namespace.name + - sources: + - from: connection + resourcedetection/env: + detectors: + - env + override: false + timeout: 2s + receivers: + filelog: + exclude: [] + include: + - /var/log/pods/*/*/*.log + include_file_name: false + include_file_path: true + operators: + - id: container-parser + max_log_size: 102400 + type: container + retry_on_failure: + enabled: true + start_at: end + hostmetrics: + collection_interval: 10s + root_path: /hostfs + scrapers: + cpu: + metrics: + system.cpu.utilization: + enabled: true + disk: {} + filesystem: + exclude_fs_types: + fs_types: + - autofs + - binfmt_misc + - bpf + - cgroup2 + - configfs + - debugfs + - devpts + - devtmpfs + - fusectl + - hugetlbfs + - iso9660 + - mqueue + - nsfs + - overlay + - proc + - procfs + - pstore + - rpc_pipefs + - securityfs + - selinuxfs + - squashfs + - sysfs + - tracefs + match_type: strict + exclude_mount_points: + match_type: regexp + mount_points: + - /dev/* + - /proc/* + - /sys/* + - /run/k3s/containerd/* + - /var/lib/docker/* + - /var/lib/kubelet/* + - /snap/* + metrics: + system.filesystem.utilization: + enabled: true + load: {} + memory: + metrics: + system.memory.utilization: + enabled: true + network: {} + kubeletstats: + auth_type: serviceAccount + collection_interval: 15s + endpoint: https://${env:OTEL_K8S_NODE_IP}:10250 + extra_metadata_labels: + - container.id + - k8s.volume.type + insecure_skip_verify: true + k8s_api_config: + auth_type: serviceAccount + metric_groups: + - node + - pod + - volume + - container + metrics: + container.cpu.usage: + enabled: true + k8s.node.cpu.usage: + enabled: true + k8s.node.uptime: + enabled: true + k8s.pod.cpu.usage: + enabled: true + k8s.pod.uptime: + enabled: true + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + prometheus: + config: + scrape_configs: + - job_name: kubernetes-pods + kubernetes_sd_configs: + - role: pod + selectors: + - field: spec.nodeName=$OTEL_K8S_NODE_NAME + role: pod + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape + - action: drop + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape_slow + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $$1:$$2 + source_labels: + - __address__ + - __meta_kubernetes_pod_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+) + replacement: __param_$$1 + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: pod + - action: drop + regex: Pending|Succeeded|Failed|Completed + source_labels: + - __meta_kubernetes_pod_phase + - action: replace + source_labels: + - __meta_kubernetes_pod_label_app_kubernetes_io_name + target_label: job + scrape_interval: 30s + - job_name: node-exporter + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - action: replace + regex: (.*) + replacement: $1 + separator: ; + source_labels: + - job + target_label: __tmp_prometheus_job_name + scrape_interval: 30s + static_configs: + - targets: + - ${OTEL_K8S_NODE_IP}:9100 + - authorization: + credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token + type: Bearer + follow_redirects: true + honor_labels: true + honor_timestamps: true + job_name: kubelet + kubernetes_sd_configs: + - follow_redirects: true + role: node + selectors: + - field: metadata.name=$OTEL_K8S_NODE_NAME + role: node + metric_relabel_configs: + - action: drop + regex: container_cpu_(load_average_10s|system_seconds_total|user_seconds_total) + replacement: $1 + separator: ; + source_labels: + - __name__ + - action: drop + regex: container_fs_(io_current|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total) + replacement: $1 + separator: ; + source_labels: + - __name__ + - action: drop + regex: container_memory_(mapped_file|swap) + replacement: $1 + separator: ; + source_labels: + - __name__ + - action: drop + regex: container_(file_descriptors|tasks_state|threads_max) + replacement: $1 + separator: ; + source_labels: + - __name__ + - action: drop + regex: container_spec.* + replacement: $1 + separator: ; + source_labels: + - __name__ + - action: drop + regex: .+; + replacement: $1 + separator: ; + source_labels: + - id + - pod + metrics_path: /metrics/cadvisor + relabel_configs: + - action: replace + regex: (.*) + replacement: $1 + separator: ; + source_labels: + - job + target_label: __tmp_prometheus_job_name + - action: replace + replacement: kubelet + target_label: job + - action: replace + regex: (.*) + replacement: ${1} + separator: ; + source_labels: + - __meta_kubernetes_node_name + target_label: node + - action: replace + regex: (.*) + replacement: https-metrics + separator: ; + target_label: endpoint + - action: replace + regex: (.*) + replacement: $1 + separator: ; + source_labels: + - __metrics_path__ + target_label: metrics_path + - action: hashmod + modulus: 1 + regex: (.*) + replacement: $1 + separator: ; + source_labels: + - __address__ + target_label: __tmp_hash + - action: keep + regex: $(SHARD) + replacement: $1 + separator: ; + source_labels: + - __tmp_hash + scheme: https + scrape_interval: 15s + scrape_timeout: 10s + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + service: + pipelines: + logs: + exporters: + - debug + - otlp + processors: + - k8sattributes + - resourcedetection/env + - batch + receivers: + - otlp + - filelog + metrics: + exporters: + - debug + - otlp + processors: + - k8sattributes + - resourcedetection/env + - batch + receivers: + - prometheus + - otlp + - hostmetrics + - kubeletstats + traces: + exporters: + - debug + - otlp + processors: + - k8sattributes + - resourcedetection/env + - batch + receivers: + - otlp + image: "otel/opentelemetry-collector-k8s:0.103.1" + imagePullPolicy: IfNotPresent + upgradeStrategy: automatic + hostNetwork: false + shareProcessNamespace: false + terminationGracePeriodSeconds: 30 + resources: + limits: + cpu: 100m + memory: 250Mi + requests: + cpu: 100m + memory: 128Mi + securityContext: + {} + volumeMounts: + - name: varlogpods + mountPath: /var/log/pods + readOnly: true + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + - name: hostfs + mountPath: /hostfs + readOnly: true + mountPropagation: HostToContainer + env: + - name: OTEL_K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OTEL_K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: OTEL_K8S_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: OTEL_K8S_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: OTEL_K8S_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: OTEL_RESOURCE_ATTRIBUTES + value: "k8s.cluster.name=demo" + + - name: ACCESS_TOKEN + valueFrom: + secretKeyRef: + key: access_token + name: otel-collector-secret + volumes: + - name: varlogpods + hostPath: + path: /var/log/pods + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + - name: hostfs + hostPath: + path: / diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/instrumentation.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/instrumentation.yaml new file mode 100644 index 000000000..20b87b4ba --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/instrumentation.yaml @@ -0,0 +1,36 @@ +--- +# Source: opentelemetry-kube-stack/templates/instrumentation.yaml +apiVersion: opentelemetry.io/v1alpha1 +kind: Instrumentation +metadata: + name: example + labels: + helm.sh/chart: opentelemetry-kube-stack-0.0.7 + app.kubernetes.io/version: "0.103.0" + app.kubernetes.io/managed-by: Helm + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": hook-failed +spec: + exporter: + endpoint: http://${OTEL_K8S_NODE_NAME}:4317 + propagators: + - tracecontext + - baggage + - b3 + - b3multi + - jaeger + - xray + - ottrace + env: + - name: OTEL_K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + resource: + addK8sUIDAttributes: true + resourceAttributes: {} + python: + env: + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://${OTEL_K8S_NODE_NAME}:4318 diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/admission-webhooks/operator-webhook-with-cert-manager.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/admission-webhooks/operator-webhook-with-cert-manager.yaml new file mode 100644 index 000000000..7775bac4f --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/admission-webhooks/operator-webhook-with-cert-manager.yaml @@ -0,0 +1,192 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/admission-webhooks/operator-webhook-with-cert-manager.yaml +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + annotations: + cert-manager.io/inject-ca-from: default/example-opentelemetry-operator-serving-cert + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: webhook + name: example-opentelemetry-operator-mutation +webhooks: + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: example-opentelemetry-operator-webhook + namespace: default + path: /mutate-opentelemetry-io-v1alpha1-instrumentation + port: 443 + failurePolicy: Ignore + name: minstrumentation.kb.io + rules: + - apiGroups: + - opentelemetry.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - instrumentations + scope: Namespaced + sideEffects: None + timeoutSeconds: 10 + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: example-opentelemetry-operator-webhook + namespace: default + path: /mutate-opentelemetry-io-v1beta1-opentelemetrycollector + port: 443 + failurePolicy: Ignore + name: mopentelemetrycollectorbeta.kb.io + rules: + - apiGroups: + - opentelemetry.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - opentelemetrycollectors + scope: Namespaced + sideEffects: None + timeoutSeconds: 10 + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: example-opentelemetry-operator-webhook + namespace: default + path: /mutate-v1-pod + port: 443 + failurePolicy: Ignore + name: mpod.kb.io + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + resources: + - pods + scope: Namespaced + sideEffects: None + timeoutSeconds: 10 +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/admission-webhooks/operator-webhook-with-cert-manager.yaml +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + annotations: + cert-manager.io/inject-ca-from: default/example-opentelemetry-operator-serving-cert + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: webhook + name: example-opentelemetry-operator-validation +webhooks: + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: example-opentelemetry-operator-webhook + namespace: default + path: /validate-opentelemetry-io-v1alpha1-instrumentation + port: 443 + failurePolicy: Ignore + name: vinstrumentationcreateupdate.kb.io + rules: + - apiGroups: + - opentelemetry.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - instrumentations + scope: Namespaced + sideEffects: None + timeoutSeconds: 10 + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: example-opentelemetry-operator-webhook + namespace: default + path: /validate-opentelemetry-io-v1alpha1-instrumentation + port: 443 + failurePolicy: Ignore + name: vinstrumentationdelete.kb.io + rules: + - apiGroups: + - opentelemetry.io + apiVersions: + - v1alpha1 + operations: + - DELETE + resources: + - instrumentations + scope: Namespaced + sideEffects: None + timeoutSeconds: 10 + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: example-opentelemetry-operator-webhook + namespace: default + path: /validate-opentelemetry-io-v1beta1-opentelemetrycollector + port: 443 + failurePolicy: Ignore + name: vopentelemetrycollectorcreateupdatebeta.kb.io + rules: + - apiGroups: + - opentelemetry.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - opentelemetrycollectors + scope: Namespaced + sideEffects: None + timeoutSeconds: 10 + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: example-opentelemetry-operator-webhook + namespace: default + path: /validate-opentelemetry-io-v1beta1-opentelemetrycollector + port: 443 + failurePolicy: Ignore + name: vopentelemetrycollectordeletebeta.kb.io + rules: + - apiGroups: + - opentelemetry.io + apiVersions: + - v1beta1 + operations: + - DELETE + resources: + - opentelemetrycollectors + scope: Namespaced + sideEffects: None + timeoutSeconds: 10 diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/admission-webhooks/operator-webhook.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/admission-webhooks/operator-webhook.yaml new file mode 100644 index 000000000..5b9ff5511 --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/admission-webhooks/operator-webhook.yaml @@ -0,0 +1,3 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/admission-webhooks/operator-webhook.yaml +--- diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/certmanager.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/certmanager.yaml new file mode 100644 index 000000000..dc2bf94ca --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/certmanager.yaml @@ -0,0 +1,43 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/certmanager.yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: webhook + name: example-opentelemetry-operator-serving-cert + namespace: default +spec: + dnsNames: + - example-opentelemetry-operator-webhook.default.svc + - example-opentelemetry-operator-webhook.default.svc.cluster.local + issuerRef: + kind: Issuer + name: example-opentelemetry-operator-selfsigned-issuer + secretName: example-opentelemetry-operator-controller-manager-service-cert + subject: + organizationalUnits: + - example-opentelemetry-operator +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/certmanager.yaml +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: webhook + name: example-opentelemetry-operator-selfsigned-issuer + namespace: default +spec: + selfSigned: {} diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/clusterrole.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/clusterrole.yaml new file mode 100644 index 000000000..fc6ad7de6 --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/clusterrole.yaml @@ -0,0 +1,265 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + name: example-opentelemetry-operator-manager +rules: + - apiGroups: + - "" + resources: + - configmaps + - persistentvolumeclaims + - persistentvolumes + - pods + - serviceaccounts + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - apiGroups: + - "" + resources: + - namespaces + verbs: + - list + - watch + - apiGroups: + - apps + resources: + - daemonsets + - deployments + - statefulsets + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - apps + - extensions + resources: + - replicasets + verbs: + - get + - list + - watch + - apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - watch + - apiGroups: + - config.openshift.io + resources: + - infrastructures + - infrastructures/status + verbs: + - get + - list + - watch + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - list + - update + - apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - opentelemetry.io + resources: + - instrumentations + verbs: + - get + - list + - patch + - update + - watch + - apiGroups: + - opentelemetry.io + resources: + - opampbridges + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - opentelemetry.io + resources: + - opampbridges/finalizers + verbs: + - update + - apiGroups: + - opentelemetry.io + resources: + - opampbridges/status + verbs: + - get + - patch + - update + - apiGroups: + - opentelemetry.io + resources: + - opentelemetrycollectors + verbs: + - get + - list + - patch + - update + - watch + - apiGroups: + - opentelemetry.io + resources: + - opentelemetrycollectors/finalizers + verbs: + - get + - patch + - update + - apiGroups: + - opentelemetry.io + resources: + - opentelemetrycollectors/status + verbs: + - get + - patch + - update + - apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - route.openshift.io + resources: + - routes + - routes/custom-host + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + name: example-opentelemetry-operator-metrics +rules: + - nonResourceURLs: + - /metrics + verbs: + - get +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + name: example-opentelemetry-operator-proxy +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/clusterrolebinding.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/clusterrolebinding.yaml new file mode 100644 index 000000000..e6918c520 --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/clusterrolebinding.yaml @@ -0,0 +1,44 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + name: example-opentelemetry-operator-manager +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: example-opentelemetry-operator-manager +subjects: + - kind: ServiceAccount + name: opentelemetry-operator + namespace: default +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + name: example-opentelemetry-operator-proxy +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: example-opentelemetry-operator-proxy +subjects: + - kind: ServiceAccount + name: opentelemetry-operator + namespace: default diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/deployment.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/deployment.yaml new file mode 100644 index 000000000..6da6e7947 --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/deployment.yaml @@ -0,0 +1,105 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + name: example-opentelemetry-operator + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/component: controller-manager + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/component: controller-manager + spec: + hostNetwork: false + containers: + - args: + - --metrics-addr=0.0.0.0:8080 + - --enable-leader-election + - --health-probe-addr=:8081 + - --webhook-port=9443 + - --collector-image=otel/opentelemetry-collector-k8s:0.102.1 + command: + - /manager + env: + - name: ENABLE_WEBHOOKS + value: "true" + image: "ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator:0.102.0" + name: manager + ports: + - containerPort: 8080 + name: metrics + protocol: TCP + - containerPort: 9443 + name: webhook-server + protocol: TCP + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 100m + memory: 64Mi + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + + - args: + - --secure-listen-address=0.0.0.0:8443 + - --upstream=http://127.0.0.1:8080/ + - --logtostderr=true + - --v=0 + image: "quay.io/brancz/kube-rbac-proxy:v0.15.0" + name: kube-rbac-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 5m + memory: 64Mi + serviceAccountName: opentelemetry-operator + terminationGracePeriodSeconds: 10 + volumes: + - name: cert + secret: + defaultMode: 420 + secretName: example-opentelemetry-operator-controller-manager-service-cert + securityContext: + fsGroup: 65532 + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/role.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/role.yaml new file mode 100644 index 000000000..968764e98 --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/role.yaml @@ -0,0 +1,43 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + name: example-opentelemetry-operator-leader-election + namespace: default +rules: + - apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - "" + resources: + - configmaps/status + verbs: + - get + - update + - patch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/rolebinding.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/rolebinding.yaml new file mode 100644 index 000000000..e153dfc5b --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/rolebinding.yaml @@ -0,0 +1,23 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/rolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + name: example-opentelemetry-operator-leader-election + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: example-opentelemetry-operator-leader-election +subjects: + - kind: ServiceAccount + name: opentelemetry-operator + namespace: default diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/service.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/service.yaml new file mode 100644 index 000000000..50b51e134 --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/service.yaml @@ -0,0 +1,51 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + name: example-opentelemetry-operator + namespace: default +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: https + - name: metrics + port: 8080 + protocol: TCP + targetPort: metrics + selector: + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/component: controller-manager +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + name: example-opentelemetry-operator-webhook + namespace: default +spec: + ports: + - port: 443 + protocol: TCP + targetPort: webhook-server + selector: + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/component: controller-manager diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/serviceaccount.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/serviceaccount.yaml new file mode 100644 index 000000000..576f75f34 --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/serviceaccount.yaml @@ -0,0 +1,15 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: opentelemetry-operator + namespace: default + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/tests/test-certmanager-connection.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/tests/test-certmanager-connection.yaml new file mode 100644 index 000000000..b95641331 --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/tests/test-certmanager-connection.yaml @@ -0,0 +1,38 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/tests/test-certmanager-connection.yaml +apiVersion: v1 +kind: Pod +metadata: + name: "example-opentelemetry-operator-cert-manager" + namespace: default + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: webhook + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: "busybox:latest" + env: + - name: CERT_MANAGER_CLUSTERIP + value: "cert-manager-webhook" + - name: CERT_MANAGER_PORT + value: "443" + command: + - sh + - -c + # The following shell script tests if the cert-manager service is up. If the service is up, when we try + # to wget its exposed port, we will get an HTTP error 400. + - | + wget_output=$(wget -q "$CERT_MANAGER_CLUSTERIP:$CERT_MANAGER_PORT") + if wget_output=="wget: server returned error: HTTP/1.0 400 Bad Request" + then exit 0 + else exit 1 + fi + restartPolicy: Never diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/tests/test-service-connection.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/tests/test-service-connection.yaml new file mode 100644 index 000000000..ac8cf507a --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/rendered/opentelemetry-operator/tests/test-service-connection.yaml @@ -0,0 +1,76 @@ +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/tests/test-service-connection.yaml +apiVersion: v1 +kind: Pod +metadata: + name: "example-opentelemetry-operator-metrics" + namespace: default + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: "busybox:latest" + env: + - name: MANAGER_METRICS_SERVICE_CLUSTERIP + value: "example-opentelemetry-operator" + - name: MANAGER_METRICS_SERVICE_PORT + value: "8443" + command: + - sh + - -c + # The following shell script tests if the controller-manager-metrics-service is up. + # If the service is up, when we try to wget its exposed port, we will get an HTTP error 400. + - | + wget_output=$(wget -q "$MANAGER_METRICS_SERVICE_CLUSTERIP:$MANAGER_METRICS_SERVICE_PORT") + if wget_output=="wget: server returned error: HTTP/1.0 400 Bad Request" + then exit 0 + else exit 1 + fi + restartPolicy: Never +--- +# Source: opentelemetry-kube-stack/charts/opentelemetry-operator/templates/tests/test-service-connection.yaml +apiVersion: v1 +kind: Pod +metadata: + name: "example-opentelemetry-operator-webhook" + namespace: default + labels: + helm.sh/chart: opentelemetry-operator-0.63.2 + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/version: "0.102.0" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: example + + app.kubernetes.io/component: controller-manager + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: "busybox:latest" + env: + - name: WEBHOOK_SERVICE_CLUSTERIP + value: "example-opentelemetry-operator-webhook" + - name: WEBHOOK_SERVICE_PORT + value: "443" + command: + - sh + - -c + # The following shell script tests if the webhook service is up. If the service is up, when we try + # to wget its exposed port, we will get an HTTP error 400. + - | + wget_output=$(wget -q "$WEBHOOK_SERVICE_CLUSTERIP:$WEBHOOK_SERVICE_PORT") + if wget_output=="wget: server returned error: HTTP/1.0 400 Bad Request" + then exit 0 + else exit 1 + fi + restartPolicy: Never diff --git a/charts/opentelemetry-kube-stack/examples/cloud-demo/values.yaml b/charts/opentelemetry-kube-stack/examples/cloud-demo/values.yaml new file mode 100644 index 000000000..00ee1dedb --- /dev/null +++ b/charts/opentelemetry-kube-stack/examples/cloud-demo/values.yaml @@ -0,0 +1,58 @@ +clusterName: demo +opentelemetry-operator: + enabled: true +collectors: + daemon: + env: + - name: ACCESS_TOKEN + valueFrom: + secretKeyRef: + key: access_token + name: otel-collector-secret + config: + exporters: + otlp: + endpoint: ingest.example.com:443 + headers: + "access-token": "${ACCESS_TOKEN}" + service: + pipelines: + metrics: + exporters: [debug, otlp] + traces: + exporters: [debug, otlp] + logs: + exporters: [debug, otlp] + cluster: + env: + - name: ACCESS_TOKEN + valueFrom: + secretKeyRef: + key: access_token + name: otel-collector-secret + config: + exporters: + otlp: + endpoint: ingest.example.com:443 + headers: + "access-token": "${ACCESS_TOKEN}" + service: + pipelines: + metrics: + exporters: [debug, otlp] +instrumentation: + enabled: true + env: + - name: OTEL_K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + exporter: + endpoint: http://${OTEL_K8S_NODE_NAME}:4317 + python: + env: + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://${OTEL_K8S_NODE_NAME}:4318 +opAMPBridge: + enabled: true + addReportingLabel: true diff --git a/charts/opentelemetry-kube-stack/templates/_config.tpl b/charts/opentelemetry-kube-stack/templates/_config.tpl new file mode 100644 index 000000000..d43276784 --- /dev/null +++ b/charts/opentelemetry-kube-stack/templates/_config.tpl @@ -0,0 +1,345 @@ +{{/* +Constructs the final config for the given collector + +This allows a user to supply a scrape_configs_file. This file is templated and loaded as a yaml array. +If a user has already supplied a prometheus receiver config, the file's config is appended. Finally, +the config is written as YAML. +*/}} +{{- define "opentelemetry-kube-stack.config" -}} +{{- $collector := .collector }} +{{- $config := .collector.config }} +{{- if .collector.scrape_configs_file }} +{{- $config = (include "opentelemetry-kube-stack.collector.appendPrometheusScrapeFile" . | fromYaml) }} +{{- $_ := set $collector "config" $config }} +{{- end }} +{{- if .collector.presets.kubernetesAttributes.enabled }} +{{- $config = (include "opentelemetry-kube-stack.collector.applyKubernetesAttributesConfig" (dict "collector" $collector) | fromYaml) }} +{{- $_ := set $collector "config" $config }} +{{- end }} +{{- if .collector.presets.logsCollection.enabled }} +{{- $_ := set $collector "exclude" (printf "/var/log/pods/%s_%s*_*/%s/*.log" .namespace (include "opentelemetry-kube-stack.collectorFullname" .) (.Chart.Name | lower)) }} +{{- $config = (include "opentelemetry-kube-stack.collector.applyLogsCollectionConfig" (dict "collector" $collector) | fromYaml) -}} +{{- $_ := set $collector "config" $config }} +{{- end }} +{{- if .collector.presets.hostMetrics.enabled }} +{{- $config = (include "opentelemetry-kube-stack.collector.applyHostMetricsConfig" (dict "collector" $collector) | fromYaml) -}} +{{- $_ := set $collector "config" $config }} +{{- end }} +{{- if .collector.presets.kubernetesAttributes.enabled }} +{{- $config = (include "opentelemetry-kube-stack.collector.applyKubernetesAttributesConfig" (dict "collector" $collector) | fromYaml) -}} +{{- $_ := set $collector "config" $config }} +{{- end }} +{{- if .collector.presets.kubeletMetrics.enabled }} +{{- $config = (include "opentelemetry-kube-stack.collector.applyKubeletMetricsConfig" (dict "collector" $collector) | fromYaml) -}} +{{- $_ := set $collector "config" $config }} +{{- end }} +{{- if .collector.presets.kubernetesEvents.enabled }} +{{- $config = (include "opentelemetry-kube-stack.collector.applyKubernetesEventsConfig" (dict "collector" $collector) | fromYaml) -}} +{{- $_ := set $collector "config" $config }} +{{- end }} +{{- if .collector.presets.clusterMetrics.enabled }} +{{- $config = (include "opentelemetry-kube-stack.collector.applyClusterMetricsConfig" (dict "collector" $collector) | fromYaml) -}} +{{- $_ := set $collector "config" $config }} +{{- end }} +{{- toYaml $collector.config | nindent 4 }} +{{- end }} + +{{/* +This helper allows a user to load in an external scrape configs file directly from prometheus. +The helper will load and then append the scrape configs list to an existing prometheus scraper. +If no prometheus configuration is present, the prometheus configuration is added. + +This helper ultimately assists users in getting started with Kubernetes infra metrics from scratch +OR helps them easily port prometheus to the otel-kube-stack chart with no changes to their prometheus config. +*/}} +{{- define "opentelemetry-kube-stack.collector.appendPrometheusScrapeFile" -}} +{{- $loaded_file := (.Files.Get .collector.scrape_configs_file) }} +{{- $loaded_config := (fromYamlArray (tpl $loaded_file .)) }} +{{- $prom_override := (dict "receivers" (dict "prometheus" (dict "config" (dict "scrape_configs" $loaded_config)))) }} +{{- if (dig "receivers" "prometheus" "config" "scrape_configs" false .collector.config) }} +{{- $merged_prom_scrape_configs := (concat .collector.config.receivers.prometheus.config.scrape_configs $loaded_config) }} +{{- $prom_override = (dict "receivers" (dict "prometheus" (dict "config" (dict "scrape_configs" $merged_prom_scrape_configs)))) }} +{{- end }} +{{- if and (dig "service" "pipelines" "metrics" false .collector.config) (not (has "prometheus" (dig "service" "pipelines" "metrics" "receivers" list .collector.config))) }} +{{- $_ := set .collector.config.service.pipelines.metrics "receivers" (prepend (.collector.config.service.pipelines.metrics.receivers | default list) "prometheus" | uniq) }} +{{- end }} +{{- (mergeOverwrite .collector.config $prom_override) | toYaml }} +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.applyKubernetesAttributesConfig" -}} +{{- $config := mustMergeOverwrite (include "opentelemetry-kube-stack.collector.kubernetesAttributesConfig" .collector | fromYaml) .collector.config }} +{{- if and (dig "service" "pipelines" "logs" false $config) (not (has "k8sattributes" (dig "service" "pipelines" "logs" "processors" list $config))) }} +{{- $_ := set $config.service.pipelines.logs "processors" (prepend ($config.service.pipelines.logs.processors | default list) "k8sattributes" | uniq) }} +{{- end }} +{{- if and (dig "service" "pipelines" "metrics" false $config) (not (has "k8sattributes" (dig "service" "pipelines" "metrics" "processors" list $config))) }} +{{- $_ := set $config.service.pipelines.metrics "processors" (prepend ($config.service.pipelines.metrics.processors | default list) "k8sattributes" | uniq) }} +{{- end }} +{{- if and (dig "service" "pipelines" "traces" false $config) (not (has "k8sattributes" (dig "service" "pipelines" "traces" "processors" list $config))) }} +{{- $_ := set $config.service.pipelines.traces "processors" (prepend ($config.service.pipelines.traces.processors | default list) "k8sattributes" | uniq) }} +{{- end }} +{{- $config | toYaml }} +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.kubernetesAttributesConfig" -}} +processors: + k8sattributes: + {{- if eq .mode "daemonset" }} + filter: + node_from_env_var: K8S_NODE_NAME + {{- end }} + passthrough: false + pod_association: + - sources: + - from: resource_attribute + name: k8s.pod.uid + - sources: + - from: resource_attribute + name: k8s.pod.name + - from: resource_attribute + name: k8s.namespace.name + - from: resource_attribute + name: k8s.node.name + - sources: + - from: resource_attribute + name: k8s.pod.ip + - sources: + - from: resource_attribute + name: k8s.pod.name + - from: resource_attribute + name: k8s.namespace.name + - sources: + - from: connection + extract: + metadata: + - k8s.namespace.name + - k8s.pod.name + - k8s.pod.uid + - k8s.node.name + - k8s.pod.start_time + - k8s.deployment.name + - k8s.replicaset.name + - k8s.replicaset.uid + - k8s.daemonset.name + - k8s.daemonset.uid + - k8s.job.name + - k8s.job.uid + - k8s.container.name + - k8s.cronjob.name + - k8s.statefulset.name + - k8s.statefulset.uid + - container.image.tag + - container.image.name + - k8s.cluster.uid + labels: + - tag_name: service.name + key: app.kubernetes.io/name + from: pod + - tag_name: service.name + key: k8s-app + from: pod + - tag_name: k8s.app.instance + key: app.kubernetes.io/instance + from: pod + - tag_name: service.version + key: app.kubernetes.io/version + from: pod + - tag_name: k8s.app.component + key: app.kubernetes.io/component + from: pod + {{- if .presets.kubernetesAttributes.extractAllPodLabels }} + - tag_name: $$1 + key_regex: (.*) + from: pod + {{- end }} + {{- if .presets.kubernetesAttributes.extractAllPodAnnotations }} + annotations: + - tag_name: $$1 + key_regex: (.*) + from: pod + {{- end }} +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.applyHostMetricsConfig" -}} +{{- $config := mustMergeOverwrite (include "opentelemetry-kube-stack.collector.hostMetricsConfig" .collector | fromYaml) .collector.config }} +{{- if and (dig "service" "pipelines" "metrics" false $config) (not (has "hostmetrics" (dig "service" "pipelines" "metrics" "receivers" list $config))) }} +{{- $_ := set $config.service.pipelines.metrics "receivers" (append ($config.service.pipelines.metrics.receivers | default list) "hostmetrics" | uniq) }} +{{- end }} +{{- $config | toYaml }} +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.hostMetricsConfig" -}} +receivers: + hostmetrics: + root_path: /hostfs + collection_interval: 10s + scrapers: + cpu: + metrics: + system.cpu.utilization: + enabled: true + load: {} + memory: + metrics: + system.memory.utilization: + enabled: true + disk: {} + filesystem: + metrics: + system.filesystem.utilization: + enabled: true + exclude_mount_points: + mount_points: + - /dev/* + - /proc/* + - /sys/* + - /run/k3s/containerd/* + - /var/lib/docker/* + - /var/lib/kubelet/* + - /snap/* + match_type: regexp + exclude_fs_types: + fs_types: + - autofs + - binfmt_misc + - bpf + - cgroup2 + - configfs + - debugfs + - devpts + - devtmpfs + - fusectl + - hugetlbfs + - iso9660 + - mqueue + - nsfs + - overlay + - proc + - procfs + - pstore + - rpc_pipefs + - securityfs + - selinuxfs + - squashfs + - sysfs + - tracefs + match_type: strict + network: {} +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.applyClusterMetricsConfig" -}} +{{- $config := mustMergeOverwrite (include "opentelemetry-kube-stack.collector.clusterMetricsConfig" .collector | fromYaml) .collector.config }} +{{- if and (dig "service" "pipelines" "metrics" false $config) (not (has "k8s_cluster" (dig "service" "pipelines" "metrics" "receivers" list $config))) }} +{{- $_ := set $config.service.pipelines.metrics "receivers" (append ($config.service.pipelines.metrics.receivers | default list) "k8s_cluster" | uniq) }} +{{- end }} +{{- $config | toYaml }} +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.clusterMetricsConfig" -}} +receivers: + k8s_cluster: + collection_interval: 10s + auth_type: serviceAccount + node_conditions_to_report: [Ready, MemoryPressure, DiskPressure, NetworkUnavailable] + allocatable_types_to_report: [cpu, memory, storage] +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.applyKubeletMetricsConfig" -}} +{{- $config := mustMergeOverwrite (include "opentelemetry-kube-stack.collector.kubeletMetricsConfig" .collector | fromYaml) .collector.config }} +{{- if and (dig "service" "pipelines" "metrics" false $config) (not (has "kubeletstats" (dig "service" "pipelines" "metrics" "receivers" list $config))) }} +{{- $_ := set $config.service.pipelines.metrics "receivers" (append ($config.service.pipelines.metrics.receivers | default list) "kubeletstats" | uniq) }} +{{- end }} +{{- $config | toYaml }} +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.kubeletMetricsConfig" -}} +receivers: + kubeletstats: + collection_interval: "15s" + auth_type: "serviceAccount" + insecure_skip_verify: true + # For this scrape to work, the RBAC must have `nodes/stats` GET access. + endpoint: "https://${env:OTEL_K8S_NODE_IP}:10250" + extra_metadata_labels: + - container.id + - k8s.volume.type + metric_groups: + - node + - pod + - volume + - container + k8s_api_config: + auth_type: serviceAccount + metrics: + # k8s.pod.cpu.utilization is being deprecated + k8s.pod.cpu.usage: + enabled: true + container.cpu.usage: + enabled: true + k8s.node.cpu.usage: + enabled: true + k8s.node.uptime: + enabled: true + k8s.pod.uptime: + enabled: true +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.applyLogsCollectionConfig" -}} +{{- $config := mustMergeOverwrite (include "opentelemetry-kube-stack.collector.logsCollectionConfig" .collector | fromYaml) .collector.config }} +{{- if and (dig "service" "pipelines" "logs" false $config) (not (has "filelog" (dig "service" "pipelines" "logs" "receivers" list $config))) }} +{{- $_ := set $config.service.pipelines.logs "receivers" (append ($config.service.pipelines.logs.receivers | default list) "filelog" | uniq) }} +{{- end }} +{{- if .collector.presets.logsCollection.storeCheckpoints}} +{{- $_ := set $config.service "extensions" (append ($config.service.extensions | default list) "file_storage" | uniq) }} +{{- end }} +{{- $config | toYaml }} +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.logsCollectionConfig" -}} +{{- if .presets.logsCollection.storeCheckpoints }} +extensions: + file_storage: + directory: /var/lib/otelcol +{{- end }} +receivers: + filelog: + include: + - /var/log/pods/*/*/*.log + {{- if .presets.logsCollection.includeCollectorLogs }} + exclude: [] + {{- else }} + # Exclude collector container's logs. The file format is /var/log/pods/__//.log + exclude: {{ .exclude }} + {{- end }} + start_at: end + retry_on_failure: + enabled: true + {{- if .presets.logsCollection.storeCheckpoints}} + storage: file_storage + {{- end }} + include_file_path: true + include_file_name: false + operators: + # parse container logs + - type: container + id: container-parser + max_log_size: {{ .presets.logsCollection.maxRecombineLogSize }} +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.applyKubernetesEventsConfig" -}} +{{- $config := mustMergeOverwrite (include "opentelemetry-kube-stack.collector.kubernetesEventsConfig" .collector | fromYaml) .collector.config }} +{{- if and (dig "service" "pipelines" "logs" false $config) (not (has "k8sobjects" (dig "service" "pipelines" "logs" "receivers" list $config))) }} +{{- $_ := set $config.service.pipelines.logs "receivers" (append ($config.service.pipelines.logs.receivers | default list) "k8sobjects" | uniq) }} +{{- end }} +{{- $config | toYaml }} +{{- end }} + +{{- define "opentelemetry-kube-stack.collector.kubernetesEventsConfig" -}} +receivers: + k8sobjects: + objects: + - name: events + mode: "watch" + group: "events.k8s.io" + exclude_watch_type: + - "DELETED" +{{- end }} diff --git a/charts/opentelemetry-kube-stack/templates/_helpers.tpl b/charts/opentelemetry-kube-stack/templates/_helpers.tpl index ba0c09951..f0b5d7273 100644 --- a/charts/opentelemetry-kube-stack/templates/_helpers.tpl +++ b/charts/opentelemetry-kube-stack/templates/_helpers.tpl @@ -46,11 +46,11 @@ Allow the release namespace to be overridden Print a map of key values in a YAML block. This is useful for labels and annotations. */}} {{- define "opentelemetry-kube-stack.renderkv" -}} -{{- with . }} -{{- range $key, $value := . }} +{{- with . -}} +{{- range $key, $value := . -}} {{- printf "%s: %s" $key $value }} -{{- end }} -{{- end }} +{{- end -}} +{{- end -}} {{- end }} {{/* @@ -58,16 +58,30 @@ Render a deduped list of environment variables and 'extraEnvs' */}} {{- define "opentelemetry-kube-stack.renderenvs" -}} {{- $envMap := dict }} +{{- $valueFromMap := dict }} {{- range $item := .extraEnvs }} +{{- if $item.value }} {{- $_ := set $envMap $item.name $item.value }} +{{- else }} +{{- $_ := set $valueFromMap $item.name $item.valueFrom }} +{{- end }} {{- end }} {{- range $item := .env }} +{{- if $item.value }} {{- $_ := set $envMap $item.name $item.value }} +{{- else }} +{{- $_ := set $valueFromMap $item.name $item.valueFrom }} +{{- end }} {{- end }} {{- range $key, $value := $envMap }} - name: {{ $key }} value: {{ $value }} {{- end }} +{{- range $key, $value := $valueFromMap }} +- name: {{ $key }} + valueFrom: + {{- $value | toYaml | nindent 4 }} +{{- end }} {{- end }} {{/* @@ -109,13 +123,6 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} app.kubernetes.io/managed-by: {{ .Release.Service }} {{- end }} -{{/* -Expand the name of the chart. -*/}} -{{- define "opentelemetry-kube-stack.collectorName" -}} -{{- default .Chart.Name .collector.name | trunc 63 | trimSuffix "-" }} -{{- end }} - {{/* Create a default fully qualified app name. We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). @@ -124,12 +131,14 @@ If release name contains chart name it will be used as a full name. {{- define "opentelemetry-kube-stack.collectorFullname" -}} {{- if .fullnameOverride }} {{- .fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else if .collector.fullnameOverride }} +{{- .collector.fullnameOverride | trunc 63 | trimSuffix "-" }} {{- else }} -{{- $name := default .Chart.Name (coalesce .collector.name "") }} -{{- if contains $name .Release.Name }} +{{- $suffix := default .Chart.Name (coalesce .collector.suffix "") }} +{{- if contains $suffix .Release.Name }} {{- .Release.Name | trunc 63 | trimSuffix "-" }} {{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- printf "%s-%s" .Release.Name $suffix | trunc 63 | trimSuffix "-" }} {{- end }} {{- end }} {{- end }} @@ -149,24 +158,85 @@ Create the name of the clusterRoleBinding to use {{- end }} {{/* -Constructs the final config for the given collector - -This allows a user to supply a scrape_configs_file. This file is templated and loaded as a yaml array. -If a user has already supplied a prometheus receiver config, the file's config is appended. Finally, -the config is written as YAML. -*/}} -{{- define "opentelemetry-kube-stack.config" -}} -{{- if .collector.scrape_configs_file }} -{{- $loaded_file := (.Files.Get .collector.scrape_configs_file) }} -{{- $loaded_config := (fromYamlArray (tpl $loaded_file .)) }} -{{- $prom_override := (dict "receivers" (dict "prometheus" (dict "config" (dict "scrape_configs" $loaded_config)))) }} -{{- if (dig "receivers" "prometheus" "config" "scrape_configs" false .collector.config) }} -{{- $merged_prom_scrape_configs := (concat .collector.config.receivers.prometheus.config.scrape_configs $loaded_config) }} -{{- $prom_override = (dict "receivers" (dict "prometheus" (dict "config" (dict "scrape_configs" $merged_prom_scrape_configs)))) }} -{{- end }} -{{- $new_config := (mergeOverwrite .collector.config $prom_override)}} -{{- toYaml $new_config | nindent 4 }} -{{- else }} -{{- toYaml .collector.config | nindent 4 }} +Optionally include the RBAC for the k8sCluster receiver +*/}} +{{- define "opentelemetry-kube-stack.k8scluster.rules" -}} +{{- if $.Values.clusterRole.rules }} +{{ toYaml $.Values.clusterRole.rules }} +{{- end }} +{{- $clusterMetricsEnabled := false }} +{{- $eventsEnabled := false }} +{{ range $_, $collector := $.Values.collectors -}} +{{- $clusterMetricsEnabled = (any $clusterMetricsEnabled (dig "config" "receivers" "k8s_cluster" false $collector)) }} +{{- if (dig "presets" "clusterMetrics" "enabled" false $collector) }} +{{- $clusterMetricsEnabled = true }} +{{- end }} +{{- $eventsEnabled = (any $eventsEnabled (dig "config" "receivers" "k8s_cluster" false $collector)) }} +{{- if (dig "presets" "kubernetesEvents" "enabled" false $collector) }} +{{- $eventsEnabled = true }} +{{- end }} +{{- end }} +{{- if $clusterMetricsEnabled }} +- apiGroups: + - "" + resources: + - events + - namespaces + - namespaces/status + - nodes + - nodes/spec + - pods + - pods/status + - replicationcontrollers + - replicationcontrollers/status + - resourcequotas + - services + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - get + - list + - watch +{{- end }} +{{- if $eventsEnabled }} +- apiGroups: ["events.k8s.io"] + resources: ["events"] + verbs: ["watch", "list"] {{- end }} {{- end }} diff --git a/charts/opentelemetry-kube-stack/templates/bridge.yaml b/charts/opentelemetry-kube-stack/templates/bridge.yaml index 264cc6383..db3da520d 100644 --- a/charts/opentelemetry-kube-stack/templates/bridge.yaml +++ b/charts/opentelemetry-kube-stack/templates/bridge.yaml @@ -7,10 +7,12 @@ metadata: name: {{ include "opentelemetry-opamp-bridge.fullname" $merged }} labels: {{- include "opentelemetry-kube-stack.labels" $ | nindent 4 }} - {{- include "opentelemetry-kube-stack.renderkv" .Values.opAMPBridge.labels | nindent 4 }} + {{- include "opentelemetry-kube-stack.renderkv" .Values.opAMPBridge.labels | indent 4 }} + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": hook-failed {{- with .Values.opAMPBridge.annotations }} - annotations: - {{- include "opentelemetry-kube-stack.renderkv" . | nindent 4 }} + {{- include "opentelemetry-kube-stack.renderkv" . | nindent 4 }} {{- end }} spec: endpoint: {{ required "opamp endpoint required" $.Values.opAMPBridge.endpoint }} @@ -119,7 +121,7 @@ spec: fieldPath: status.podIP - name: OTEL_RESOURCE_ATTRIBUTES value: "k8s.cluster.name={{ $.Values.clusterName }}" - {{- include "opentelemetry-kube-stack.renderenvs" (dict "extraEnvs" $.Values.extraEnvs "env" .Values.opAMPBridge.env) | nindent 4 }} + {{- include "opentelemetry-kube-stack.renderenvs" (dict "extraEnvs" $.Values.extraEnvs "env" .Values.opAMPBridge.env) | nindent 2 }} {{- with $.Values.opAMPBridge.envFrom }} envFrom: {{- toYaml . | nindent 4 }} diff --git a/charts/opentelemetry-kube-stack/templates/clusterrole.yaml b/charts/opentelemetry-kube-stack/templates/clusterrole.yaml index 95f61f63e..1963495ac 100644 --- a/charts/opentelemetry-kube-stack/templates/clusterrole.yaml +++ b/charts/opentelemetry-kube-stack/templates/clusterrole.yaml @@ -47,70 +47,42 @@ rules: verbs: ["get", "list", "watch"] - nonResourceURLs: ["/metrics", "/metrics/cadvisor"] verbs: ["get"] -{{- if $.Values.clusterRole.rules }} -{{ toYaml $.Values.clusterRole.rules }} +{{- include "opentelemetry-kube-stack.k8scluster.rules" . }} {{- end }} -{{- $should_create := false }} {{ range $_, $collector := $.Values.collectors -}} -{{- $should_create = (any $should_create (dig "config" "receivers" "k8s_cluster" false $collector)) }} +{{- if $collector.enabled -}} +{{- $collector := (mergeOverwrite (deepCopy $.Values.defaultCRConfig) $collector) }} +{{- $merged := (dict "Template" $.Template "Files" $.Files "Chart" $.Chart "clusterRole" $.Values.clusterRole "collector" $collector "Release" $.Release "fullnameOverride" $.Values.fullnameOverride "presets" $.Values.presets) }} +{{- $fullname := (include "opentelemetry-kube-stack.collectorFullname" $merged) }} +{{- if and $collector.enabled $collector.clusterRoleBinding.enabled }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ $fullname }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "opentelemetry-kube-stack.clusterRoleName" $ }} +subjects: +- kind: ServiceAccount + # quirk of the Operator +{{- if $collector.serviceAccount }} + name: "{{ $collector.serviceAccount }}" +{{- else }} + name: "{{ $fullname }}-collector" +{{- end }} + namespace: {{ $.Release.Namespace }} +{{- if eq $collector.mode "statefulset" }} +- kind: ServiceAccount +{{- if $collector.targetAllocator.serviceAccount }} + name: "{{ $collector.targetAllocator.serviceAccount }}" +{{- else }} + name: {{ $fullname }}-targetallocator +{{- end }} + namespace: {{ $.Release.Namespace }} +{{- end }} {{- end }} -{{- if $should_create }} -- apiGroups: - - "" - resources: - - events - - namespaces - - namespaces/status - - nodes - - nodes/spec - - pods - - pods/status - - replicationcontrollers - - replicationcontrollers/status - - resourcequotas - - services - verbs: - - get - - list - - watch -- apiGroups: - - apps - resources: - - daemonsets - - deployments - - replicasets - - statefulsets - verbs: - - get - - list - - watch -- apiGroups: - - extensions - resources: - - daemonsets - - deployments - - replicasets - verbs: - - get - - list - - watch -- apiGroups: - - batch - resources: - - jobs - - cronjobs - verbs: - - get - - list - - watch -- apiGroups: - - autoscaling - resources: - - horizontalpodautoscalers - verbs: - - get - - list - - watch {{- end }} {{- end }} {{- if and $.Values.opAMPBridge.enabled $.Values.opAMPBridge.clusterRole.enabled }} diff --git a/charts/opentelemetry-kube-stack/templates/collector.yaml b/charts/opentelemetry-kube-stack/templates/collector.yaml index d19d36702..df280db70 100644 --- a/charts/opentelemetry-kube-stack/templates/collector.yaml +++ b/charts/opentelemetry-kube-stack/templates/collector.yaml @@ -1,28 +1,32 @@ {{ range $_, $collector := $.Values.collectors -}} {{- if $collector.enabled -}} {{- $collector := (mergeOverwrite (deepCopy $.Values.defaultCRConfig) $collector) }} -{{- $merged := (dict "Template" $.Template "Files" $.Files "Chart" $.Chart "clusterRole" $.Values.clusterRole "collector" $collector "Release" $.Release "fullnameOverride" $.Values.fullnameOverride) }} +{{- $merged := (dict "Template" $.Template "Files" $.Files "Chart" $.Chart "clusterRole" $.Values.clusterRole "collector" $collector "Release" $.Release "fullnameOverride" $.Values.fullnameOverride "presets" $.Values.presets "namespace" (include "opentelemetry-kube-stack.namespace" $)) }} {{- $fullname := (include "opentelemetry-kube-stack.collectorFullname" $merged) }} --- -apiVersion: opentelemetry.io/v1alpha1 +apiVersion: opentelemetry.io/v1beta1 kind: OpenTelemetryCollector metadata: name: {{ $fullname }} namespace: {{ include "opentelemetry-kube-stack.namespace" $ }} labels: {{- include "opentelemetry-kube-stack.labels" $ | nindent 4 }} - {{- include "opentelemetry-kube-stack.renderkv" $collector.labels | nindent 4 }} - {{- include "opentelemetry-kube-stack.collectorOpAMPLabels" $.Values }} - {{- with $collector.annotations }} + {{- include "opentelemetry-kube-stack.renderkv" $collector.labels | indent 4 }} + {{- include "opentelemetry-kube-stack.collectorOpAMPLabels" $.Values | indent 4 }} annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": hook-failed + {{- with $collector.annotations }} {{- include "opentelemetry-kube-stack.renderkv" . | nindent 4 }} - {{- end }} + {{- end }} spec: managementState: {{ $collector.managementState }} mode: {{ $collector.mode }} - config: | - {{- include "opentelemetry-collector.config" $merged }} + config: + {{- include "opentelemetry-kube-stack.config" $merged }} + {{- if (not (eq $collector.mode "daemonset" )) }} replicas: {{ $collector.replicas }} + {{- end }} {{- if $collector.serviceAccount }} serviceAccount: {{ $collector.serviceAccount }} {{- end }} @@ -59,9 +63,12 @@ spec: podDisruptionBudget: {{- toYaml . | nindent 4}} {{- end }} - {{- with $collector.securityContext }} securityContext: - {{- toYaml . | nindent 4}} + {{- if and (not ($collector.securityContext)) ($collector.presets.logsCollection.storeCheckpoints) }} + runAsUser: 0 + runAsGroup: 0 + {{- else -}} + {{- toYaml $collector.securityContext | nindent 4 }} {{- end }} {{- with $collector.podSecurityContext }} podSecurityContext: @@ -103,8 +110,26 @@ spec: deploymentUpdateStrategy: {{- toYaml . | nindent 4}} {{- end }} - {{- with $collector.volumeMounts }} volumeMounts: + {{- if $collector.presets.logsCollection.enabled }} + - name: varlogpods + mountPath: /var/log/pods + readOnly: true + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + {{- if $collector.presets.logsCollection.storeCheckpoints}} + - name: varlibotelcol + mountPath: /var/lib/otelcol + {{- end }} + {{- end }} + {{- if $collector.presets.hostMetrics.enabled }} + - name: hostfs + mountPath: /hostfs + readOnly: true + mountPropagation: HostToContainer + {{- end }} + {{- with $collector.volumeMounts }} {{- toYaml . | nindent 4 }} {{- end }} {{- with $collector.ports }} @@ -139,7 +164,7 @@ spec: - name: OTEL_RESOURCE_ATTRIBUTES value: "k8s.cluster.name={{ $.Values.clusterName }}" {{- end }} - {{- include "opentelemetry-kube-stack.renderenvs" (dict "extraEnvs" $.Values.extraEnvs "env" $collector.env) | nindent 4 }} + {{- include "opentelemetry-kube-stack.renderenvs" (dict "extraEnvs" $.Values.extraEnvs "env" $collector.env) | nindent 2 }} {{- with $collector.envFrom }} envFrom: {{- toYaml . | nindent 4 }} @@ -152,8 +177,27 @@ spec: tolerations: {{- toYaml . | nindent 4 }} {{- end }} - {{- with $collector.volumes }} volumes: + {{- if $collector.presets.logsCollection.enabled }} + - name: varlogpods + hostPath: + path: /var/log/pods + {{- if $collector.presets.logsCollection.storeCheckpoints}} + - name: varlibotelcol + hostPath: + path: /var/lib/otelcol + type: DirectoryOrCreate + {{- end }} + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + {{- end }} + {{- if $collector.presets.hostMetrics.enabled }} + - name: hostfs + hostPath: + path: / + {{- end }} + {{- with $collector.volumes }} {{- toYaml . | nindent 4 }} {{- end }} {{- with $collector.initContainers }} diff --git a/charts/opentelemetry-kube-stack/templates/instrumentation.yaml b/charts/opentelemetry-kube-stack/templates/instrumentation.yaml index cd3adbc24..6bc9f5c32 100644 --- a/charts/opentelemetry-kube-stack/templates/instrumentation.yaml +++ b/charts/opentelemetry-kube-stack/templates/instrumentation.yaml @@ -6,11 +6,13 @@ metadata: name: {{ include "opentelemetry-kube-stack.instrumentation" . }} labels: {{- include "opentelemetry-kube-stack.labels" $ | nindent 4 }} - {{- include "opentelemetry-kube-stack.renderkv" .Values.instrumentation.labels | nindent 4 }} - {{- with .Values.instrumentation.annotations }} + {{- include "opentelemetry-kube-stack.renderkv" .Values.instrumentation.labels | indent 4 }} annotations: - {{- include "opentelemetry-kube-stack.renderkv" . | nindent 4 }} - {{- end }} + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": hook-failed + {{- with .Values.instrumentation.annotations }} + {{- include "opentelemetry-kube-stack.renderkv" . | indent 4 }} + {{- end }} spec: exporter: endpoint: {{ .Values.instrumentation.exporter.endpoint }} @@ -21,7 +23,7 @@ spec: {{- toYaml . | nindent 4 }} {{- end }} env: - {{- include "opentelemetry-kube-stack.renderenvs" (dict "extraEnvs" $.Values.extraEnvs "env" .Values.instrumentation.env) | nindent 4 }} + {{- include "opentelemetry-kube-stack.renderenvs" (dict "extraEnvs" $.Values.extraEnvs "env" .Values.instrumentation.env) | indent 4 }} {{- with .Values.instrumentation.resource }} resource: {{- toYaml . | nindent 4 }} diff --git a/charts/opentelemetry-kube-stack/values.schema.json b/charts/opentelemetry-kube-stack/values.schema.json index 6d5df0084..12d8a7ba3 100644 --- a/charts/opentelemetry-kube-stack/values.schema.json +++ b/charts/opentelemetry-kube-stack/values.schema.json @@ -1279,6 +1279,92 @@ }, "OpenTelemetryCollectorSpec": { "properties": { + "presets": { + "type": "object", + "additionalProperties": false, + "properties": { + "logsCollection": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "description": "Specifies whether the collector should collect logs.", + "type": "boolean" + }, + "includeCollectorLogs": { + "description": "Specifies whether the collector should collect its own logs.", + "type": "boolean" + }, + "storeCheckpoints": { + "description": "Specifies whether logs checkpoints should be stored in /var/lib/otelcol/ host directory.", + "type": "boolean" + }, + "maxRecombineLogSize": { + "description": "Specifies the max recombine log size.", + "type": "integer" + } + } + }, + "hostMetrics": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "description": "Specifies whether the collector should collect host metrics.", + "type": "boolean" + } + } + }, + "kubeletMetrics": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "description": "Specifies whether the collector should collect kubelet metrics.", + "type": "boolean" + } + } + }, + "kubernetesAttributes": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "description": "Specifies whether the collector should add Kubernetes metdata to resource attributes.", + "type": "boolean" + }, + "extractAllPodLabels": { + "description": "Specifies whether the k8sattributes processor should extract all pod labels.", + "type": "boolean" + }, + "extractAllPodAnnotations": { + "description": "Specifies whether the k8sattributes processor should extract all pod annotations.", + "type": "boolean" + } + } + }, + "kubernetesEvents": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "description": "Specifies whether the collector should collect Kubernetes objects.", + "type": "boolean" + } + } + }, + "clusterMetrics": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "description": "Specifies whether the collector should collect cluster metrics.", + "type": "boolean" + } + } + } + } + }, "enabled": { "type": "boolean" }, @@ -1294,7 +1380,10 @@ } } }, - "name": { + "suffix": { + "type": "string" + }, + "fullnameOverride": { "type": "string" }, "annotations": { diff --git a/charts/opentelemetry-kube-stack/values.yaml b/charts/opentelemetry-kube-stack/values.yaml index 57bf4c23e..8c70c23fa 100644 --- a/charts/opentelemetry-kube-stack/values.yaml +++ b/charts/opentelemetry-kube-stack/values.yaml @@ -19,7 +19,9 @@ opentelemetry-operator: # Field indicating whether the operator is enabled or not # This is disabled for now while the chart is under development enabled: false - + manager: + collectorImage: + repository: otel/opentelemetry-collector-k8s # Sub-field for admission webhooks configuration admissionWebhooks: # Policy for handling failures @@ -40,8 +42,11 @@ opentelemetry-operator: defaultCRConfig: enabled: false - # Name of the collector - name: "collector" + # Suffix for the collector pool, by default the release name is prepended + suffix: "collector" + + # fullnameOverride allows overriding the collector's name + fullnameOverride: "" # Annotations for the collector annotations: {} @@ -51,6 +56,12 @@ defaultCRConfig: labels: {} # app: otc + # scrape_configs_file allows the user to load an external file into + # the collector's prometheus scrape_configs. This is added to assist users + # coming from the prometheus ecosystem by allowing users to simply copy and paste + # directly from prometheus into this file to use the same config. + scrape_configs_file: "" + # Management state of the collector managementState: managed @@ -71,10 +82,10 @@ defaultCRConfig: # Image details for the collector image: # If you want to use the core image `otel/opentelemetry-collector`, you also need to change `command.name` value to `otelcol`. - repository: otel/opentelemetry-collector-contrib + repository: otel/opentelemetry-collector-k8s pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "0.103.1" # When digest is set to a non-empty value, images will be pulled by digest (regardless of tag value). digest: "" @@ -346,6 +357,74 @@ defaultCRConfig: # - name: config # mountPath: /etc/config + # Handles basic configuration of components that + # also require k8s modifications to work correctly. + # .Values.config can be used to modify/add to a preset + # component configuration, but CANNOT be used to remove + # preset configuration. If you require removal of any + # sections of a preset configuration, you cannot use + # the preset. Instead, configure the component manually in + # .Values.config and use the other fields supplied in the + # values.yaml to configure k8s as necessary. + presets: + # Configures the collector to collect logs. + # Adds the filelog receiver to the logs pipeline + # and adds the necessary volumes and volume mounts. + # Best used with mode = daemonset. + # See https://opentelemetry.io/docs/kubernetes/collector/components/#filelog-receiver for details on the receiver. + logsCollection: + enabled: false + includeCollectorLogs: true + # Enabling this writes checkpoints in /var/lib/otelcol/ host directory. + # Note this changes collector's user to root, so that it can write to host directory. + storeCheckpoints: false + # The maximum bytes size of the recombined field. + # Once the size exceeds the limit, all received entries of the source will be combined and flushed. + maxRecombineLogSize: 102400 + # Configures the collector to collect host metrics. + # Adds the hostmetrics receiver to the metrics pipeline + # and adds the necessary volumes and volume mounts. + # Best used with mode = daemonset. + # See https://opentelemetry.io/docs/kubernetes/collector/components/#host-metrics-receiver for details on the receiver. + hostMetrics: + enabled: false + # Configures the Kubernetes Processor to add Kubernetes metadata. + # Adds the k8sattributes processor to all the pipelines + # and adds the necessary rules to ClusteRole. + # Best used with mode = daemonset. + # See https://opentelemetry.io/docs/kubernetes/collector/components/#kubernetes-attributes-processor for details on the receiver. + kubernetesAttributes: + enabled: false + # When enabled the processor will extra all labels for an associated pod and add them as resource attributes. + # The label's exact name will be the key. + extractAllPodLabels: false + # When enabled the processor will extra all annotations for an associated pod and add them as resource attributes. + # The annotation's exact name will be the key. + extractAllPodAnnotations: false + # Configures the collector to collect node, pod, and container metrics from the API server on a kubelet.. + # Adds the kubeletstats receiver to the metrics pipeline + # and adds the necessary rules to ClusteRole. + # Best used with mode = daemonset. + # See https://opentelemetry.io/docs/kubernetes/collector/components/#kubeletstats-receiver for details on the receiver. + kubeletMetrics: + enabled: false + # Configures the collector to collect kubernetes events. + # Adds the k8sobject receiver to the logs pipeline + # and collects kubernetes events by default. + # Best used with mode = deployment or statefulset. + # MUST be used by a collector with a single replica. + # See https://opentelemetry.io/docs/kubernetes/collector/components/#kubernetes-objects-receiver for details on the receiver. + kubernetesEvents: + enabled: false + # Configures the Kubernetes Cluster Receiver to collect cluster-level metrics. + # Adds the k8s_cluster receiver to the metrics pipeline + # and adds the necessary rules to ClusteRole. + # Best used with mode = deployment or statefulset. + # MUST be used by a collector with a single replica. + # See https://opentelemetry.io/docs/kubernetes/collector/components/#kubernetes-cluster-receiver for details on the receiver. + clusterMetrics: + enabled: false + # Collectors is a map of collector configurations of the form: # collectors: # collectorName: @@ -355,12 +434,124 @@ defaultCRConfig: # This configuration allows for multiple layers of overrides for different clusters. For example, you could # create a collector called test with an OTLP exporter in your values.yaml, and then override the endpoint's # destination in a file called values-staging.yaml. -collectors: {} +collectors: + daemon: + suffix: daemon + mode: daemonset + enabled: true + resources: + limits: + cpu: 100m + memory: 250Mi + requests: + cpu: 100m + memory: 128Mi + # A scrape config file to instruct the daemon collector to pull metrics from any matching targets on the same node with + # prometheus.io/scrape=true + # This config also scrapes a running node exporter and the kubelet CAdvisor metrics which aren't currently supported. + scrape_configs_file: "daemon_scrape_configs.yaml" + presets: + logsCollection: + enabled: true + kubeletMetrics: + enabled: true + hostMetrics: + enabled: true + kubernetesAttributes: + enabled: true + config: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + processors: + batch: + send_batch_size: 1000 + timeout: 1s + send_batch_max_size: 1500 + resourcedetection/env: + detectors: [env] + timeout: 2s + override: false + exporters: + debug: {} + + service: + pipelines: + traces: + receivers: + - otlp + processors: + - resourcedetection/env + - batch + exporters: + - debug + metrics: + receivers: + - otlp + processors: + - resourcedetection/env + - batch + exporters: + - debug + logs: + receivers: + - otlp + processors: + - resourcedetection/env + - batch + exporters: + - debug + cluster: + suffix: cluster-stats + replicas: 1 + mode: deployment + enabled: true + resources: + limits: + cpu: 100m + memory: 500Mi + requests: + cpu: 100m + memory: 500Mi + presets: + kubernetesAttributes: + enabled: true + kubernetesEvents: + enabled: true + clusterMetrics: + enabled: true + config: + receivers: {} + processors: + batch: + send_batch_size: 1000 + timeout: 1s + send_batch_max_size: 1500 + resourcedetection/env: + detectors: [env] + timeout: 2s + override: false + exporters: + debug: {} + service: + pipelines: + metrics: + receivers: [k8s_cluster] + processors: [resourcedetection/env, batch] + exporters: [debug] + logs: + receivers: [k8sobjects] + processors: [resourcedetection/env, batch] + exporters: [debug] # Cluster role configuration clusterRole: # Whether the cluster role is enabled or not - enabled: false + enabled: true # Annotations for the cluster role annotations: {} @@ -622,7 +813,7 @@ opAMPBridge: repository: ghcr.io/open-telemetry/opentelemetry-operator/operator-opamp-bridge pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "" + tag: "0.103.0" # When digest is set to a non-empty value, images will be pulled by digest (regardless of tag value). digest: ""