diff --git a/.github/workflows/operator-regression.yml b/.github/workflows/operator-regression.yml index 68a8d74..9dcbeb0 100644 --- a/.github/workflows/operator-regression.yml +++ b/.github/workflows/operator-regression.yml @@ -7,6 +7,9 @@ env: AGENT_TESTS: python nodejs java #go dotnet --- both pending +permissions: + contents: read + jobs: integration-test: runs-on: ubuntu-latest diff --git a/resources/kubernetes/operator/helm/values.yaml b/resources/kubernetes/operator/helm/values.yaml index 06f114d..b19871e 100644 --- a/resources/kubernetes/operator/helm/values.yaml +++ b/resources/kubernetes/operator/helm/values.yaml @@ -1,24 +1,28 @@ +# For advanced configuration options, refer to the [official OpenTelemetry Helm chart](https://github.com/open-telemetry/opentelemetry-helm-charts/blob/main/charts/opentelemetry-kube-stack/values.yaml) + opentelemetry-operator: manager: extraArgs: - --enable-go-instrumentation admissionWebhooks: certManager: - enabled: false + enabled: false # For production environments, it is [recommended to use cert-manager for better security and scalability](https://github.com/open-telemetry/opentelemetry-helm-charts/tree/main/charts/opentelemetry-operator#tls-certificate-requirement). + autoGenerateCert: - enabled: true - recreate: true + enabled: true # Enable/disable automatic certificate generation. Set to false if manually managing certificates. + recreate: true # Force certificate regeneration on updates. Only applicable if autoGenerateCert.enabled is true. crds: - create: true + create: true # Install the OpenTelemetry Operator CRDs. defaultCRConfig: image: repository: "docker.elastic.co/beats/elastic-agent" tag: "8.16.0-SNAPSHOT" targetAllocator: - enabled: false + enabled: false # Enable/disable the Operator's Target allocator. + # Refer to: https://github.com/open-telemetry/opentelemetry-operator/tree/main/cmd/otel-allocator env: - name: ELASTIC_AGENT_OTEL value: '"true"' @@ -39,29 +43,43 @@ clusterRole: resources: ["configmaps"] verbs: ["get"] -# `clusterName` specifies the name of the kubernetes cluster -# It set the 'k8s.cluster.name' field, should be used for kubernetes environments, where cluster name can not be detected using resourcedetection -# Cluster Name is detected automatically for EKS/GKE/AKS +# `clusterName` specifies the name of the Kubernetes cluster. It sets the 'k8s.cluster.name' field. +# Cluster Name is automatically detected for EKS/GKE/AKS. Add the below value in environments where cluster name cannot be detected. # clusterName: myClusterName + collectors: + # Cluster is a K8s deployment EDOT collector focused on gathering telemetry + # at the cluster level (Kubernetes Events and cluster metrics). cluster: + # Configure the pods resources to control CPU and memory usage. + # resources: + # limits: + # cpu: 100m + # memory: 500Mi + # requests: + # cpu: 100m + # memory: 500Mi config: exporters: + # [Debug exporter](https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/debugexporter/README.md) debug: - verbosity: basic + verbosity: basic # Options: basic, detailed. Choose verbosity level for debug logs. + # [Elasticsearch exporter](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/exporter/elasticsearchexporter/README.md) elasticsearch/otel: - endpoints: + endpoints: # List of Elasticsearch endpoints. - ${env:ELASTIC_ENDPOINT} - api_key: ${env:ELASTIC_API_KEY} + api_key: ${env:ELASTIC_API_KEY} # API key for Elasticsearch authentication. logs_dynamic_index: enabled: true + # Enable in order to skip the SSL certificate Check # tls: # insecure_skip_verify: true mapping: mode: otel processors: + # [Resource Detection Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor) resourcedetection/eks: - detectors: [env, eks] + detectors: [env, eks] # Detects resources from environment variables and EKS (Elastic Kubernetes Service). timeout: 15s override: true eks: @@ -69,35 +87,43 @@ collectors: k8s.cluster.name: enabled: true resourcedetection/gcp: - detectors: [env, gcp] + detectors: [env, gcp] # Detects resources from environment variables and GCP (Google Cloud Platform). timeout: 2s override: true resourcedetection/aks: - detectors: [env, aks] + detectors: [env, aks] # Detects resources from environment variables and AKS (Azure Kubernetes Service). timeout: 2s override: true aks: resource_attributes: k8s.cluster.name: enabled: true - resource/k8s: + # [Resource Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourceprocessor) + resource/k8s: # Resource attributes tailored for services within Kubernetes. attributes: - - key: service.name + - key: service.name # Set the service.name resource attribute based on the well-known app.kubernetes.io/name label from_attribute: app.label.name action: insert - - key: service.name + - key: service.name # Set the service.name resource attribute based on the k8s.container.name attribute from_attribute: k8s.container.name action: insert - - key: app.label.name + - key: app.label.name # Delete app.label.name attribute previously used for service.name action: delete - - key: service.version + - key: service.version # Set the service.version resource attribute based on the well-known app.kubernetes.io/version label from_attribute: app.label.version action: insert - - key: app.label.version + - key: app.label.version # Delete app.label.version attribute previously used for service.version action: delete + resource/hostname: + attributes: + - key: host.name + from_attribute: k8s.node.name + action: upsert + # [K8s Attributes Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/k8sattributesprocessor) k8sattributes: - passthrough: false + passthrough: false # Annotates resources with the pod IP and does not try to extract any other metadata. pod_association: + # Below association takes a look at the k8s.pod.ip and k8s.pod.uid resource attributes or connection's context, and tries to match it with the pod having the same attribute. - sources: - from: resource_attribute name: k8s.pod.ip @@ -128,8 +154,17 @@ collectors: key: app.kubernetes.io/version from: pod receivers: + # [K8s Objects Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/k8sobjectsreceiver) + k8sobjects: + objects: + - name: events + mode: "watch" + group: "events.k8s.io" + exclude_watch_type: + - "DELETED" + # [K8s Cluster Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/k8sclusterreceiver) k8s_cluster: - auth_type: serviceAccount + auth_type: serviceAccount # Determines how to authenticate to the K8s API server. This can be one of none (for no auth), serviceAccount (to use the standard service account token provided to the agent pod), or kubeConfig to use credentials from ~/.kube/config. node_conditions_to_report: - Ready - MemoryPressure @@ -149,6 +184,7 @@ collectors: k8s.container.status.last_terminated_reason: enabled: true + # [Service Section](https://opentelemetry.io/docs/collector/configuration/#service) service: pipelines: metrics: @@ -161,6 +197,7 @@ collectors: - resourcedetection/gcp - resourcedetection/aks - resource/k8s + - resource/hostname receivers: - k8s_cluster logs: @@ -170,22 +207,36 @@ collectors: - resourcedetection/eks - resourcedetection/gcp - resourcedetection/aks + - resource/hostname exporters: - debug - elasticsearch/otel + + # Daemon is a K8s daemonset EDOT collector focused on gathering telemetry at + # node level and exposing an OTLP endpoint for data ingestion. + # Auto-instrumentation SDKs will use this endpoint. daemon: + # Configure the pods resources to control CPU and memory usage. + resources: + limits: + cpu: 100m + memory: 500Mi + requests: + cpu: 100m + memory: 500Mi presets: logsCollection: - enabled: true - storeCheckpoints: true - hostNetwork: true - securityContext: + enabled: true # Enable/disable the collection of node's logs. + storeCheckpoints: true # Store checkpoints for log collection, allowing for resumption from the last processed log. + hostNetwork: true # Use the host's network namespace. This allows the daemon to access the network interfaces of the host directly. + securityContext: # Run the daemon as the root user and group for proper metrics collection. runAsUser: 0 runAsGroup: 0 - scrape_configs_file: "" + scrape_configs_file: "" # [Prometheus metrics](https://github.com/open-telemetry/opentelemetry-helm-charts/tree/main/charts/opentelemetry-kube-stack#scrape_configs_file-details) config: connectors: - signaltometrics: + # [Signal To Metrics Connector](https://github.com/elastic/opentelemetry-collector-components/tree/main/connector/signaltometricsconnector) + signaltometrics: # Produces metrics from all signal types (traces, logs, or metrics). logs: - name: service_summary include_resource_attributes: @@ -419,8 +470,10 @@ collectors: count: Int(AdjustedCount()) value: Double(0) exporters: + # [Debug exporter](https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/debugexporter/README.md) debug: verbosity: basic + # [Elasticsearch exporter](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/exporter/elasticsearchexporter/README.md) elasticsearch/otel: endpoints: - ${env:ELASTIC_ENDPOINT} @@ -437,6 +490,7 @@ collectors: # insecure_skip_verify: true mapping: mode: otel + # [Elasticsearch exporter](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/exporter/elasticsearchexporter/README.md) elasticsearch/ecs: endpoints: - ${env:ELASTIC_ENDPOINT} @@ -446,8 +500,11 @@ collectors: mapping: mode: ecs processors: + # [Batch Processor](https://github.com/open-telemetry/opentelemetry-collector/tree/main/processor/batchprocessor) batch: {} - elastictrace: {} + # [Elastic Trace Processor](https://github.com/elastic/opentelemetry-collector-components/tree/main/processor/elastictraceprocessor) + elastictrace: {} # The processor enriches traces with elastic specific requirements. + # [LSM Interval Processor](https://github.com/elastic/opentelemetry-collector-components/tree/main/processor/lsmintervalprocessor) lsminterval: intervals: - duration: 1m @@ -465,12 +522,14 @@ collectors: - set(resource.attributes["metricset.interval"], "60m") - set(attributes["data_stream.dataset"], Concat([attributes["metricset.name"], "60m"], ".")) - set(attributes["processor.event"], "metric") + # [Elastic Infra Metrics Processor](https://github.com/elastic/opentelemetry-collector-components/tree/main/processor/elasticinframetricsprocessor) elasticinframetrics: add_system_metrics: true add_k8s_metrics: true drop_original: true + # [Resource Detection Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor) resourcedetection/eks: - detectors: [env, eks] + detectors: [env, eks] # Detects resources from environment variables and EKS (Elastic Kubernetes Service). timeout: 15s override: true eks: @@ -478,50 +537,24 @@ collectors: k8s.cluster.name: enabled: true resourcedetection/gcp: - detectors: [env, gcp] + detectors: [env, gcp] # Detects resources from environment variables and GCP (Google Cloud Platform). timeout: 2s override: true resourcedetection/aks: - detectors: [env, aks] + detectors: [env, aks] # Detects resources from environment variables and AKS (Azure Kubernetes Service). timeout: 2s override: true aks: resource_attributes: k8s.cluster.name: enabled: true - resource/k8s: + resource/hostname: attributes: - - key: service.name - from_attribute: app.label.name - action: insert - - key: service.name - from_attribute: k8s.container.name - action: insert - - key: app.label.name - action: delete - - key: service.version - from_attribute: app.label.version - action: insert - - key: app.label.version - action: delete - attributes/dataset: - actions: - - key: event.dataset - from_attribute: data_stream.dataset + - key: host.name + from_attribute: k8s.node.name action: upsert - resource/cloud: - attributes: - - key: cloud.instance.id - from_attribute: host.id - action: insert - resource/process: - attributes: - - key: process.executable.name - action: delete - - key: process.executable.path - action: delete resourcedetection/system: - detectors: ["system", "ec2"] + detectors: ["system", "ec2"] # Detects resources from the system and EC2 instances. system: hostname_sources: [ "os" ] resource_attributes: @@ -557,11 +590,47 @@ collectors: enabled: false host.id: enabled: true + # [Resource Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourceprocessor) + resource/k8s: # Resource attributes tailored for services within Kubernetes. + attributes: + - key: service.name # Set the service.name resource attribute based on the well-known app.kubernetes.io/name label + from_attribute: app.label.name + action: insert + - key: service.name # Set the service.name resource attribute based on the k8s.container.name attribute + from_attribute: k8s.container.name + action: insert + - key: app.label.name # Delete app.label.name attribute previously used for service.name + action: delete + - key: service.version # Set the service.version resource attribute based on the well-known app.kubernetes.io/version label + from_attribute: app.label.version + action: insert + - key: app.label.version # Delete app.label.version attribute previously used for service.version + action: delete + resource/cloud: + attributes: + - key: cloud.instance.id + from_attribute: host.id + action: insert + resource/process: + attributes: + - key: process.executable.name + action: delete + - key: process.executable.path + action: delete + # [Attributes Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/attributesprocessor) + attributes/dataset: + actions: + - key: event.dataset + from_attribute: data_stream.dataset + action: upsert + # [K8s Attributes Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/k8sattributesprocessor) k8sattributes: filter: + # Only retrieve pods running on the same node as the collector node_from_env_var: OTEL_K8S_NODE_NAME passthrough: false pod_association: + # Below association takes a look at the k8s.pod.ip and k8s.pod.uid resource attributes or connection's context, and tries to match it with the pod having the same attribute. - sources: - from: resource_attribute name: k8s.pod.ip @@ -592,12 +661,14 @@ collectors: key: app.kubernetes.io/version from: pod receivers: + # [OTLP Receiver](https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver/otlpreceiver) otlp: protocols: grpc: endpoint: 0.0.0.0:4317 http: endpoint: 0.0.0.0:4318 + # [File Log Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/filelogreceiver) filelog: retry_on_failure: enabled: true @@ -610,11 +681,12 @@ collectors: include_file_name: false include_file_path: true operators: - - id: container-parser + - id: container-parser # Extract container's metadata type: container + # [Hostmetrics Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/hostmetricsreceiver) hostmetrics: collection_interval: 10s - root_path: /hostfs + root_path: /hostfs # Mounted node's root file system scrapers: cpu: metrics: @@ -680,8 +752,9 @@ collectors: - sysfs - tracefs match_type: strict + # [Kubelet Stats Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/kubeletstatsreceiver) kubeletstats: - auth_type: serviceAccount + auth_type: serviceAccount # Authentication mechanism with the Kubelet endpoint, refer to: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/kubeletstatsreceiver#configuration collection_interval: 20s endpoint: ${env:OTEL_K8S_NODE_NAME}:10250 node: '${env:OTEL_K8S_NODE_NAME}' @@ -714,6 +787,8 @@ collectors: enabled: true extra_metadata_labels: - container.id + + # [Service Section](https://opentelemetry.io/docs/collector/configuration/#service) service: pipelines: logs/node: @@ -727,6 +802,7 @@ collectors: - resourcedetection/gcp - resourcedetection/aks - resource/k8s + - resource/hostname - resource/cloud exporters: - debug @@ -742,6 +818,7 @@ collectors: - resourcedetection/gcp - resourcedetection/aks - resource/k8s + - resource/hostname - resource/cloud exporters: - debug @@ -759,6 +836,7 @@ collectors: - resourcedetection/gcp - resourcedetection/aks - resource/k8s + - resource/hostname - resource/cloud - attributes/dataset - resource/process @@ -770,6 +848,7 @@ collectors: - otlp processors: - batch + - resource/hostname exporters: - debug - signaltometrics @@ -779,6 +858,7 @@ collectors: - otlp processors: - batch + - resource/hostname exporters: - debug - signaltometrics @@ -789,6 +869,7 @@ collectors: processors: - batch - elastictrace + - resource/hostname exporters: - debug - signaltometrics @@ -803,25 +884,27 @@ collectors: - debug - elasticsearch/otel +# For more details on OpenTelemetry's zero-code instrumentation, see: +# https://opentelemetry.io/docs/concepts/instrumentation/zero-code/ instrumentation: name: elastic-instrumentation - enabled: true + enabled: true # Enable/disable auto-instrumentation. exporter: - endpoint: http://opentelemetry-kube-stack-daemon-collector.opentelemetry-operator-system.svc.cluster.local:4318 + endpoint: http://opentelemetry-kube-stack-daemon-collector.opentelemetry-operator-system.svc.cluster.local:4318 # The daemonset OpenTelemetry Collector endpoint where telemetry data will be exported. propagators: - - tracecontext - - baggage - - b3 + - tracecontext # W3C TraceContext propagator for distributed tracing. + - baggage # Baggage propagator to include baggage information in trace context. + - b3 # B3 propagator for Zipkin-based distributed tracing compatibility. sampler: - type: parentbased_traceidratio - argument: "1.0" + type: parentbased_traceidratio # Sampler type + argument: "1.0" # Sampling rate set to 100% (all traces are sampled). java: image: docker.elastic.co/observability/elastic-otel-javaagent:1.0.0 nodejs: - image: docker.elastic.co/observability/elastic-otel-node:edge + image: docker.elastic.co/observability/elastic-otel-node:0.4.1 dotnet: image: docker.elastic.co/observability/elastic-otel-dotnet:edge python: - image: docker.elastic.co/observability/elastic-otel-python:edge + image: docker.elastic.co/observability/elastic-otel-python:0.3.0 go: image: ghcr.io/open-telemetry/opentelemetry-go-instrumentation/autoinstrumentation-go:v0.14.0-alpha