From ea6ab35cf9ec5f86f908f956b92bf691d9b12b55 Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Tue, 3 Oct 2023 10:00:45 -0700 Subject: [PATCH 1/8] Add k8s cluster name detection in EKS environment Use ec2 instance tags to determine the EKS k8s cluster name. --- .chloggen/eks_cluster_name.yaml | 27 ++++++++ .../resourcedetectionprocessor/README.md | 18 +++-- .../resourcedetectionprocessor/factory.go | 14 +++- .../internal/aws/eks/detector.go | 65 ++++++++++++++++++- .../internal/aws/eks/detector_test.go | 17 ++++- .../eks/internal/metadata/generated_config.go | 8 ++- .../metadata/generated_config_test.go | 10 +-- .../internal/metadata/generated_resource.go | 7 ++ .../metadata/generated_resource_test.go | 8 ++- .../internal/metadata/testdata/config.yaml | 4 ++ .../internal/aws/eks/metadata.yaml | 6 +- 11 files changed, 166 insertions(+), 18 deletions(-) create mode 100755 .chloggen/eks_cluster_name.yaml diff --git a/.chloggen/eks_cluster_name.yaml b/.chloggen/eks_cluster_name.yaml new file mode 100755 index 000000000000..949bcde02c54 --- /dev/null +++ b/.chloggen/eks_cluster_name.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: resourcedetectionprocessor + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add k8s cluster name detection when running in EKS + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [26794] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/processor/resourcedetectionprocessor/README.md b/processor/resourcedetectionprocessor/README.md index 0fdc7b13ca9e..87ff503f3d49 100644 --- a/processor/resourcedetectionprocessor/README.md +++ b/processor/resourcedetectionprocessor/README.md @@ -18,7 +18,7 @@ The resource detection processor can be used to detect resource information from the host, -in a format that conforms to the [OpenTelemetry resource semantic conventions](https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/resource/semantic_conventions/), and append or +in a format that conforms to the [OpenTelemetry resource semantic conventions](https://github.com/open-telemetry/semantic-conventions/tree/main/docs/resource), and append or override the resource value in telemetry data with this information. ## Supported detectors @@ -133,7 +133,7 @@ We map these environment variables to resource attributes as follows: | `HEROKU_RELEASE_VERSION` | `service.version` | | `HEROKU_SLUG_COMMIT` | `heroku.release.commit` | -For more information, see the [Heroku cloud provider documentation](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/cloud_provider/heroku.md) under the [OpenTelemetry specification semantic conventions](https://github.com/open-telemetry/opentelemetry-specification). +For more information, see the [Heroku cloud provider documentation](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/cloud-provider/heroku.md) under the [OpenTelemetry specification semantic conventions](https://github.com/open-telemetry/semantic-conventions). ```yaml processors: @@ -318,6 +318,10 @@ processors: * cloud.provider ("aws") * cloud.platform ("aws_eks") + * k8s.cluster.name + +Note: The kubernetes cluster name is only available when running on EC2 instances, and requires permission to run the `EC2:DescribeInstances` [action](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html). +If you see an error with the message `context deadline exceeded`, please increase the timeout setting in your config. Example: @@ -325,7 +329,7 @@ Example: processors: resourcedetection/eks: detectors: [env, eks] - timeout: 2s + timeout: 15s override: false ``` @@ -334,21 +338,21 @@ processors: Uses the AWS Lambda [runtime environment variables](https://docs.aws.amazon.com/lambda/latest/dg/configuration-envvars.html#configuration-envvars-runtime) to retrieve the following resource attributes: -[Cloud semantic conventions](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/cloud.md) +[Cloud semantic conventions](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/cloud.md) * `cloud.provider` (`"aws"`) * `cloud.platform` (`"aws_lambda"`) * `cloud.region` (`$AWS_REGION`) -[Function as a Service semantic conventions](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/faas.md) -and [AWS Lambda semantic conventions](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/instrumentation/aws-lambda.md#resource-detector) +[Function as a Service semantic conventions](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/faas.md) +and [AWS Lambda semantic conventions](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/faas/aws-lambda.md) * `faas.name` (`$AWS_LAMBDA_FUNCTION_NAME`) * `faas.version` (`$AWS_LAMBDA_FUNCTION_VERSION`) * `faas.instance` (`$AWS_LAMBDA_LOG_STREAM_NAME`) * `faas.max_memory` (`$AWS_LAMBDA_FUNCTION_MEMORY_SIZE`) -[AWS Logs semantic conventions](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/cloud_provider/aws/logs.md) +[AWS Logs semantic conventions](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/cloud-provider/aws/logs.md) * `aws.log.group.names` (`$AWS_LAMBDA_LOG_GROUP_NAME`) * `aws.log.stream.names` (`$AWS_LAMBDA_LOG_STREAM_NAME`) diff --git a/processor/resourcedetectionprocessor/factory.go b/processor/resourcedetectionprocessor/factory.go index d404706fcd8b..04c79b7df094 100644 --- a/processor/resourcedetectionprocessor/factory.go +++ b/processor/resourcedetectionprocessor/factory.go @@ -34,6 +34,11 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor/internal/system" ) +const ( + defaultTimeout = 5 * time.Second + eksDefaultTimeout = 11 * time.Second +) + var consumerCapabilities = consumer.Capabilities{MutatesData: true} type factory struct { @@ -97,7 +102,7 @@ func createDefaultConfig() component.Config { func defaultHTTPClientSettings() confighttp.HTTPClientSettings { httpClientSettings := confighttp.NewDefaultHTTPClientSettings() - httpClientSettings.Timeout = 5 * time.Second + httpClientSettings.Timeout = defaultTimeout return httpClientSettings } @@ -172,6 +177,13 @@ func (f *factory) getResourceDetectionProcessor( if oCfg.Attributes != nil { params.Logger.Warn("You are using deprecated `attributes` option that will be removed soon; use `resource_attributes` instead, details on configuration: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor#migration-from-attributes-to-resource_attributes") } + + for _, detector := range oCfg.Detectors { + if detector == eks.TypeStr && oCfg.HTTPClientSettings.Timeout == defaultTimeout { + oCfg.HTTPClientSettings.Timeout = eksDefaultTimeout + } + } + provider, err := f.getResourceProvider(params, oCfg.HTTPClientSettings.Timeout, oCfg.Detectors, oCfg.DetectorConfig, oCfg.Attributes) if err != nil { return nil, err diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go index 5727669c7f6f..c9addeb896cd 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go @@ -7,7 +7,12 @@ import ( "context" "fmt" "os" + "strings" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/ec2metadata" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/ec2" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/processor" conventions "go.opentelemetry.io/collector/semconv/v1.6.1" @@ -28,10 +33,16 @@ const ( kubernetesServiceHostEnvVar = "KUBERNETES_SERVICE_HOST" authConfigmapNS = "kube-system" authConfigmapName = "aws-auth" + + clusterNameAwsEksTag = "aws:eks:cluster-name" + clusterNameEksTag = "eks:cluster-name" + kubernetesClusterNameTag = "kubernetes.io/cluster/" ) type detectorUtils interface { getConfigMap(ctx context.Context, namespace string, name string) (map[string]string, error) + getClusterName(ctx context.Context) (string, error) + getClusterNameTagFromReservations([]*ec2.Reservation) string } type eksDetectorUtils struct { @@ -54,6 +65,7 @@ var _ detectorUtils = (*eksDetectorUtils)(nil) func NewDetector(set processor.CreateSettings, dcfg internal.DetectorConfig) (internal.Detector, error) { cfg := dcfg.(Config) utils, err := newK8sDetectorUtils() + return &detector{ utils: utils, logger: set.Logger, @@ -74,7 +86,12 @@ func (d *detector) Detect(ctx context.Context) (resource pcommon.Resource, schem d.rb.SetCloudProvider(conventions.AttributeCloudProviderAWS) d.rb.SetCloudPlatform(conventions.AttributeCloudPlatformAWSEKS) - return d.rb.Emit(), conventions.SchemaURL, nil + // The error is unhandled because we want to return successfully detected resources + // regardless of an error. The caller will properly handle any error hit while getting + // the cluster name. + clusterName, err := d.utils.getClusterName(ctx) + d.rb.SetK8sClusterName(clusterName) + return d.rb.Emit(), conventions.SchemaURL, err } func isEKS(ctx context.Context, utils detectorUtils) (bool, error) { @@ -114,3 +131,49 @@ func (e eksDetectorUtils) getConfigMap(ctx context.Context, namespace string, na } return cm.Data, nil } + +func (e eksDetectorUtils) getClusterName(ctx context.Context) (string, error) { + sess := session.Must(session.NewSession()) + ec2Svc := ec2metadata.New(sess) + region, err := ec2Svc.Region() + if err != nil { + return "", err + } + + svc := ec2.New(sess, aws.NewConfig().WithRegion(region)) + instanceIdentityDocument, err := ec2Svc.GetInstanceIdentityDocumentWithContext(ctx) + if err != nil { + return "", err + } + + instances, err := svc.DescribeInstances(&ec2.DescribeInstancesInput{ + InstanceIds: []*string{ + aws.String(instanceIdentityDocument.InstanceID), + }, + }) + if err != nil { + return "", err + } + + clusterName := e.getClusterNameTagFromReservations(instances.Reservations) + if len(clusterName) == 0 { + return clusterName, fmt.Errorf("Failed to detect EKS cluster name. No tag for cluster name found on EC2 instance") + } + return clusterName, nil +} + +func (e eksDetectorUtils) getClusterNameTagFromReservations(reservations []*ec2.Reservation) string { + for _, reservation := range reservations { + for _, instance := range reservation.Instances { + for _, tag := range instance.Tags { + if *tag.Key == clusterNameAwsEksTag || *tag.Key == clusterNameEksTag { + return *tag.Value + } else if strings.HasPrefix(*tag.Key, kubernetesClusterNameTag) { + return strings.TrimPrefix(*tag.Key, kubernetesClusterNameTag) + } + } + } + } + + return "" +} diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go b/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go index 4d4b5b111970..dee1833efea4 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go @@ -7,6 +7,7 @@ import ( "context" "testing" + "github.com/aws/aws-sdk-go/service/ec2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -16,6 +17,11 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata" ) +const ( + clusterNameKey = "k8s.cluster.name" + clusterName = "my-cluster" +) + type MockDetectorUtils struct { mock.Mock } @@ -25,6 +31,15 @@ func (detectorUtils *MockDetectorUtils) getConfigMap(_ context.Context, namespac return args.Get(0).(map[string]string), args.Error(1) } +func (detectorUtils *MockDetectorUtils) getClusterName(_ context.Context) (string, error) { + var reservations []*ec2.Reservation + return detectorUtils.getClusterNameTagFromReservations(reservations), nil +} + +func (detectorUtils *MockDetectorUtils) getClusterNameTagFromReservations(_ []*ec2.Reservation) string { + return clusterName +} + func TestNewDetector(t *testing.T) { dcfg := CreateDefaultConfig() detector, err := NewDetector(processortest.NewNopCreateSettings(), dcfg) @@ -38,7 +53,7 @@ func TestEKS(t *testing.T) { ctx := context.Background() t.Setenv("KUBERNETES_SERVICE_HOST", "localhost") - detectorUtils.On("getConfigMap", authConfigmapNS, authConfigmapName).Return(map[string]string{"cluster.name": "my-cluster"}, nil) + detectorUtils.On("getConfigMap", authConfigmapNS, authConfigmapName).Return(map[string]string{clusterNameKey: clusterName}, nil) // Call EKS Resource detector to detect resources eksResourceDetector := &detector{utils: detectorUtils, err: nil, rb: metadata.NewResourceBuilder(metadata.DefaultResourceAttributesConfig())} res, _, err := eksResourceDetector.Detect(ctx) diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_config.go b/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_config.go index 47fbd55571ca..f21c9ae76a1d 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_config.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_config.go @@ -25,8 +25,9 @@ func (rac *ResourceAttributeConfig) Unmarshal(parser *confmap.Conf) error { // ResourceAttributesConfig provides config for resourcedetectionprocessor/eks resource attributes. type ResourceAttributesConfig struct { - CloudPlatform ResourceAttributeConfig `mapstructure:"cloud.platform"` - CloudProvider ResourceAttributeConfig `mapstructure:"cloud.provider"` + CloudPlatform ResourceAttributeConfig `mapstructure:"cloud.platform"` + CloudProvider ResourceAttributeConfig `mapstructure:"cloud.provider"` + K8sClusterName ResourceAttributeConfig `mapstructure:"k8s.cluster.name"` } func DefaultResourceAttributesConfig() ResourceAttributesConfig { @@ -37,5 +38,8 @@ func DefaultResourceAttributesConfig() ResourceAttributesConfig { CloudProvider: ResourceAttributeConfig{ Enabled: true, }, + K8sClusterName: ResourceAttributeConfig{ + Enabled: false, + }, } } diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_config_test.go b/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_config_test.go index 9ce16e7f0d6a..fa542527d5fb 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_config_test.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_config_test.go @@ -25,15 +25,17 @@ func TestResourceAttributesConfig(t *testing.T) { { name: "all_set", want: ResourceAttributesConfig{ - CloudPlatform: ResourceAttributeConfig{Enabled: true}, - CloudProvider: ResourceAttributeConfig{Enabled: true}, + CloudPlatform: ResourceAttributeConfig{Enabled: true}, + CloudProvider: ResourceAttributeConfig{Enabled: true}, + K8sClusterName: ResourceAttributeConfig{Enabled: true}, }, }, { name: "none_set", want: ResourceAttributesConfig{ - CloudPlatform: ResourceAttributeConfig{Enabled: false}, - CloudProvider: ResourceAttributeConfig{Enabled: false}, + CloudPlatform: ResourceAttributeConfig{Enabled: false}, + CloudProvider: ResourceAttributeConfig{Enabled: false}, + K8sClusterName: ResourceAttributeConfig{Enabled: false}, }, }, } diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_resource.go b/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_resource.go index aff8c18f53ad..b4286d831a6b 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_resource.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_resource.go @@ -35,6 +35,13 @@ func (rb *ResourceBuilder) SetCloudProvider(val string) { } } +// SetK8sClusterName sets provided value as "k8s.cluster.name" attribute. +func (rb *ResourceBuilder) SetK8sClusterName(val string) { + if rb.config.K8sClusterName.Enabled { + rb.res.Attributes().PutStr("k8s.cluster.name", val) + } +} + // Emit returns the built resource and resets the internal builder state. func (rb *ResourceBuilder) Emit() pcommon.Resource { r := rb.res diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_resource_test.go b/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_resource_test.go index 40fc980e81bc..a467659755bc 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_resource_test.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/generated_resource_test.go @@ -15,6 +15,7 @@ func TestResourceBuilder(t *testing.T) { rb := NewResourceBuilder(cfg) rb.SetCloudPlatform("cloud.platform-val") rb.SetCloudProvider("cloud.provider-val") + rb.SetK8sClusterName("k8s.cluster.name-val") res := rb.Emit() assert.Equal(t, 0, rb.Emit().Attributes().Len()) // Second call should return empty Resource @@ -23,7 +24,7 @@ func TestResourceBuilder(t *testing.T) { case "default": assert.Equal(t, 2, res.Attributes().Len()) case "all_set": - assert.Equal(t, 2, res.Attributes().Len()) + assert.Equal(t, 3, res.Attributes().Len()) case "none_set": assert.Equal(t, 0, res.Attributes().Len()) return @@ -41,6 +42,11 @@ func TestResourceBuilder(t *testing.T) { if ok { assert.EqualValues(t, "cloud.provider-val", val.Str()) } + val, ok = res.Attributes().Get("k8s.cluster.name") + assert.Equal(t, test == "all_set", ok) + if ok { + assert.EqualValues(t, "k8s.cluster.name-val", val.Str()) + } }) } } diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/testdata/config.yaml b/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/testdata/config.yaml index d00b63470c51..1b7d4c7eda41 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/testdata/config.yaml +++ b/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata/testdata/config.yaml @@ -5,9 +5,13 @@ all_set: enabled: true cloud.provider: enabled: true + k8s.cluster.name: + enabled: true none_set: resource_attributes: cloud.platform: enabled: false cloud.provider: enabled: false + k8s.cluster.name: + enabled: false diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/metadata.yaml b/processor/resourcedetectionprocessor/internal/aws/eks/metadata.yaml index e544e7c1f552..9911e7164b53 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/metadata.yaml +++ b/processor/resourcedetectionprocessor/internal/aws/eks/metadata.yaml @@ -10,4 +10,8 @@ resource_attributes: cloud.platform: description: The cloud.platform type: string - enabled: true \ No newline at end of file + enabled: true + k8s.cluster.name: + description: The EKS cluster name. This attribute is currently only available when running on EC2 instances, and requires permission to run the EC2:DescribeInstances action. + type: string + enabled: false \ No newline at end of file From 68dde8d78c95d32e12b6a2869527b52b1334c2be Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Thu, 26 Oct 2023 15:29:22 -0700 Subject: [PATCH 2/8] Remove longer EKS timeout as it's not necessary --- processor/resourcedetectionprocessor/factory.go | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/processor/resourcedetectionprocessor/factory.go b/processor/resourcedetectionprocessor/factory.go index 04c79b7df094..955957d61d80 100644 --- a/processor/resourcedetectionprocessor/factory.go +++ b/processor/resourcedetectionprocessor/factory.go @@ -35,8 +35,7 @@ import ( ) const ( - defaultTimeout = 5 * time.Second - eksDefaultTimeout = 11 * time.Second + defaultTimeout = 5 * time.Second ) var consumerCapabilities = consumer.Capabilities{MutatesData: true} @@ -178,12 +177,6 @@ func (f *factory) getResourceDetectionProcessor( params.Logger.Warn("You are using deprecated `attributes` option that will be removed soon; use `resource_attributes` instead, details on configuration: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor#migration-from-attributes-to-resource_attributes") } - for _, detector := range oCfg.Detectors { - if detector == eks.TypeStr && oCfg.HTTPClientSettings.Timeout == defaultTimeout { - oCfg.HTTPClientSettings.Timeout = eksDefaultTimeout - } - } - provider, err := f.getResourceProvider(params, oCfg.HTTPClientSettings.Timeout, oCfg.Detectors, oCfg.DetectorConfig, oCfg.Attributes) if err != nil { return nil, err From 286ab156a2c4b325377b782284932bc9924c8ddc Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Fri, 27 Oct 2023 08:47:46 -0700 Subject: [PATCH 3/8] Changes requested by Dmitrii - Don't panic on error - Remove unrelated changes - Reuse conventions variable for k8s cluster name --- processor/resourcedetectionprocessor/README.md | 12 ++++++------ processor/resourcedetectionprocessor/factory.go | 7 +------ .../internal/aws/eks/detector.go | 7 ++++++- .../internal/aws/eks/detector_test.go | 6 +++--- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/processor/resourcedetectionprocessor/README.md b/processor/resourcedetectionprocessor/README.md index 87ff503f3d49..820b7cf24ef2 100644 --- a/processor/resourcedetectionprocessor/README.md +++ b/processor/resourcedetectionprocessor/README.md @@ -18,7 +18,7 @@ The resource detection processor can be used to detect resource information from the host, -in a format that conforms to the [OpenTelemetry resource semantic conventions](https://github.com/open-telemetry/semantic-conventions/tree/main/docs/resource), and append or +in a format that conforms to the [OpenTelemetry resource semantic conventions](https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/resource/semantic_conventions/), and append or override the resource value in telemetry data with this information. ## Supported detectors @@ -133,7 +133,7 @@ We map these environment variables to resource attributes as follows: | `HEROKU_RELEASE_VERSION` | `service.version` | | `HEROKU_SLUG_COMMIT` | `heroku.release.commit` | -For more information, see the [Heroku cloud provider documentation](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/cloud-provider/heroku.md) under the [OpenTelemetry specification semantic conventions](https://github.com/open-telemetry/semantic-conventions). +For more information, see the [Heroku cloud provider documentation](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/cloud_provider/heroku.md) under the [OpenTelemetry specification semantic conventions](https://github.com/open-telemetry/opentelemetry-specification). ```yaml processors: @@ -338,21 +338,21 @@ processors: Uses the AWS Lambda [runtime environment variables](https://docs.aws.amazon.com/lambda/latest/dg/configuration-envvars.html#configuration-envvars-runtime) to retrieve the following resource attributes: -[Cloud semantic conventions](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/cloud.md) +[Cloud semantic conventions](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/cloud.md) * `cloud.provider` (`"aws"`) * `cloud.platform` (`"aws_lambda"`) * `cloud.region` (`$AWS_REGION`) -[Function as a Service semantic conventions](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/faas.md) -and [AWS Lambda semantic conventions](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/faas/aws-lambda.md) +[Function as a Service semantic conventions](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/faas.md) +and [AWS Lambda semantic conventions](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/instrumentation/aws-lambda.md#resource-detector) * `faas.name` (`$AWS_LAMBDA_FUNCTION_NAME`) * `faas.version` (`$AWS_LAMBDA_FUNCTION_VERSION`) * `faas.instance` (`$AWS_LAMBDA_LOG_STREAM_NAME`) * `faas.max_memory` (`$AWS_LAMBDA_FUNCTION_MEMORY_SIZE`) -[AWS Logs semantic conventions](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/cloud-provider/aws/logs.md) +[AWS Logs semantic conventions](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/cloud_provider/aws/logs.md) * `aws.log.group.names` (`$AWS_LAMBDA_LOG_GROUP_NAME`) * `aws.log.stream.names` (`$AWS_LAMBDA_LOG_STREAM_NAME`) diff --git a/processor/resourcedetectionprocessor/factory.go b/processor/resourcedetectionprocessor/factory.go index 955957d61d80..d404706fcd8b 100644 --- a/processor/resourcedetectionprocessor/factory.go +++ b/processor/resourcedetectionprocessor/factory.go @@ -34,10 +34,6 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor/internal/system" ) -const ( - defaultTimeout = 5 * time.Second -) - var consumerCapabilities = consumer.Capabilities{MutatesData: true} type factory struct { @@ -101,7 +97,7 @@ func createDefaultConfig() component.Config { func defaultHTTPClientSettings() confighttp.HTTPClientSettings { httpClientSettings := confighttp.NewDefaultHTTPClientSettings() - httpClientSettings.Timeout = defaultTimeout + httpClientSettings.Timeout = 5 * time.Second return httpClientSettings } @@ -176,7 +172,6 @@ func (f *factory) getResourceDetectionProcessor( if oCfg.Attributes != nil { params.Logger.Warn("You are using deprecated `attributes` option that will be removed soon; use `resource_attributes` instead, details on configuration: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor#migration-from-attributes-to-resource_attributes") } - provider, err := f.getResourceProvider(params, oCfg.HTTPClientSettings.Timeout, oCfg.Detectors, oCfg.DetectorConfig, oCfg.Attributes) if err != nil { return nil, err diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go index c9addeb896cd..5f9ad2d1fb2f 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go @@ -91,6 +91,7 @@ func (d *detector) Detect(ctx context.Context) (resource pcommon.Resource, schem // the cluster name. clusterName, err := d.utils.getClusterName(ctx) d.rb.SetK8sClusterName(clusterName) + return d.rb.Emit(), conventions.SchemaURL, err } @@ -133,7 +134,11 @@ func (e eksDetectorUtils) getConfigMap(ctx context.Context, namespace string, na } func (e eksDetectorUtils) getClusterName(ctx context.Context) (string, error) { - sess := session.Must(session.NewSession()) + sess, err := session.NewSession() + if err != nil { + return "", err + } + ec2Svc := ec2metadata.New(sess) region, err := ec2Svc.Region() if err != nil { diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go b/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go index dee1833efea4..30bdd77bae7b 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go @@ -12,14 +12,14 @@ import ( "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/processor/processortest" + conventions "go.opentelemetry.io/collector/semconv/v1.6.1" "go.uber.org/zap" "github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor/internal/aws/eks/internal/metadata" ) const ( - clusterNameKey = "k8s.cluster.name" - clusterName = "my-cluster" + clusterName = "my-cluster" ) type MockDetectorUtils struct { @@ -53,7 +53,7 @@ func TestEKS(t *testing.T) { ctx := context.Background() t.Setenv("KUBERNETES_SERVICE_HOST", "localhost") - detectorUtils.On("getConfigMap", authConfigmapNS, authConfigmapName).Return(map[string]string{clusterNameKey: clusterName}, nil) + detectorUtils.On("getConfigMap", authConfigmapNS, authConfigmapName).Return(map[string]string{conventions.AttributeK8SClusterName: clusterName}, nil) // Call EKS Resource detector to detect resources eksResourceDetector := &detector{utils: detectorUtils, err: nil, rb: metadata.NewResourceBuilder(metadata.DefaultResourceAttributesConfig())} res, _, err := eksResourceDetector.Detect(ctx) From e0a552c75cbe58cb218429fca2bb4e81e8ac2e7f Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Fri, 27 Oct 2023 16:38:37 -0700 Subject: [PATCH 4/8] Only attempt to get cluster name if it's enabled --- .../internal/aws/eks/detector.go | 9 +++++++-- .../internal/aws/eks/detector_test.go | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go index 5f9ad2d1fb2f..a84f4da2ac7c 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go @@ -54,6 +54,7 @@ type detector struct { utils detectorUtils logger *zap.Logger err error + ra metadata.ResourceAttributesConfig rb *metadata.ResourceBuilder } @@ -70,6 +71,7 @@ func NewDetector(set processor.CreateSettings, dcfg internal.DetectorConfig) (in utils: utils, logger: set.Logger, err: err, + ra: cfg.ResourceAttributes, rb: metadata.NewResourceBuilder(cfg.ResourceAttributes), }, nil } @@ -89,8 +91,11 @@ func (d *detector) Detect(ctx context.Context) (resource pcommon.Resource, schem // The error is unhandled because we want to return successfully detected resources // regardless of an error. The caller will properly handle any error hit while getting // the cluster name. - clusterName, err := d.utils.getClusterName(ctx) - d.rb.SetK8sClusterName(clusterName) + if d.ra.K8sClusterName.Enabled { + var clusterName string + clusterName, err = d.utils.getClusterName(ctx) + d.rb.SetK8sClusterName(clusterName) + } return d.rb.Emit(), conventions.SchemaURL, err } diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go b/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go index 30bdd77bae7b..06e44262e8e1 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go @@ -55,7 +55,7 @@ func TestEKS(t *testing.T) { t.Setenv("KUBERNETES_SERVICE_HOST", "localhost") detectorUtils.On("getConfigMap", authConfigmapNS, authConfigmapName).Return(map[string]string{conventions.AttributeK8SClusterName: clusterName}, nil) // Call EKS Resource detector to detect resources - eksResourceDetector := &detector{utils: detectorUtils, err: nil, rb: metadata.NewResourceBuilder(metadata.DefaultResourceAttributesConfig())} + eksResourceDetector := &detector{utils: detectorUtils, err: nil, ra: metadata.DefaultResourceAttributesConfig(), rb: metadata.NewResourceBuilder(metadata.DefaultResourceAttributesConfig())} res, _, err := eksResourceDetector.Detect(ctx) require.NoError(t, err) From 7f3394a91b7637abc73b7b5a3626de8c3e3455fa Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Tue, 31 Oct 2023 10:08:19 -0700 Subject: [PATCH 5/8] Add nil check for tag's key --- .../resourcedetectionprocessor/internal/aws/eks/detector.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go index a84f4da2ac7c..717c71d96801 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go @@ -176,6 +176,10 @@ func (e eksDetectorUtils) getClusterNameTagFromReservations(reservations []*ec2. for _, reservation := range reservations { for _, instance := range reservation.Instances { for _, tag := range instance.Tags { + if tag.Key == nil { + continue + } + if *tag.Key == clusterNameAwsEksTag || *tag.Key == clusterNameEksTag { return *tag.Value } else if strings.HasPrefix(*tag.Key, kubernetesClusterNameTag) { From 02e1d0b3b6ce56eaf784991225c0272374d4ff90 Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Thu, 9 Nov 2023 15:05:34 -0800 Subject: [PATCH 6/8] Handle error when getting k8s cluster name --- .../internal/aws/eks/detector.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go index 717c71d96801..f141e52f306a 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go @@ -88,13 +88,17 @@ func (d *detector) Detect(ctx context.Context) (resource pcommon.Resource, schem d.rb.SetCloudProvider(conventions.AttributeCloudProviderAWS) d.rb.SetCloudPlatform(conventions.AttributeCloudPlatformAWSEKS) - // The error is unhandled because we want to return successfully detected resources - // regardless of an error. The caller will properly handle any error hit while getting - // the cluster name. if d.ra.K8sClusterName.Enabled { var clusterName string clusterName, err = d.utils.getClusterName(ctx) - d.rb.SetK8sClusterName(clusterName) + if err != nil { + // We clear out the error here so that the correctly detected resources + // can still be used by the processor. + d.logger.Warn("Unable to get EKS cluster name", zap.Error(err)) + err = nil + } else { + d.rb.SetK8sClusterName(clusterName) + } } return d.rb.Emit(), conventions.SchemaURL, err From 2ddd0a07b4a2b534b10715e7f1454be76d359da7 Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Fri, 10 Nov 2023 09:30:02 -0800 Subject: [PATCH 7/8] Refactor around error handling and logging --- .../internal/aws/eks/detector.go | 35 +++++++++---------- .../internal/aws/eks/detector_test.go | 4 +-- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go index f141e52f306a..d7eb6397f1bd 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go @@ -41,7 +41,7 @@ const ( type detectorUtils interface { getConfigMap(ctx context.Context, namespace string, name string) (map[string]string, error) - getClusterName(ctx context.Context) (string, error) + getClusterName(ctx context.Context, logger *zap.Logger) string getClusterNameTagFromReservations([]*ec2.Reservation) string } @@ -89,16 +89,8 @@ func (d *detector) Detect(ctx context.Context) (resource pcommon.Resource, schem d.rb.SetCloudPlatform(conventions.AttributeCloudPlatformAWSEKS) if d.ra.K8sClusterName.Enabled { - var clusterName string - clusterName, err = d.utils.getClusterName(ctx) - if err != nil { - // We clear out the error here so that the correctly detected resources - // can still be used by the processor. - d.logger.Warn("Unable to get EKS cluster name", zap.Error(err)) - err = nil - } else { - d.rb.SetK8sClusterName(clusterName) - } + clusterName := d.utils.getClusterName(ctx, d.logger) + d.rb.SetK8sClusterName(clusterName) } return d.rb.Emit(), conventions.SchemaURL, err @@ -142,22 +134,26 @@ func (e eksDetectorUtils) getConfigMap(ctx context.Context, namespace string, na return cm.Data, nil } -func (e eksDetectorUtils) getClusterName(ctx context.Context) (string, error) { +func (e eksDetectorUtils) getClusterName(ctx context.Context, logger *zap.Logger) string { + defaultErrorMessage := "Unable to get EKS cluster name" sess, err := session.NewSession() if err != nil { - return "", err + logger.Warn(defaultErrorMessage, zap.Error(err)) + return "" } ec2Svc := ec2metadata.New(sess) region, err := ec2Svc.Region() if err != nil { - return "", err + logger.Warn(defaultErrorMessage, zap.Error(err)) + return "" } svc := ec2.New(sess, aws.NewConfig().WithRegion(region)) instanceIdentityDocument, err := ec2Svc.GetInstanceIdentityDocumentWithContext(ctx) if err != nil { - return "", err + logger.Warn(defaultErrorMessage, zap.Error(err)) + return "" } instances, err := svc.DescribeInstances(&ec2.DescribeInstancesInput{ @@ -166,14 +162,17 @@ func (e eksDetectorUtils) getClusterName(ctx context.Context) (string, error) { }, }) if err != nil { - return "", err + logger.Warn(defaultErrorMessage, zap.Error(err)) + return "" } clusterName := e.getClusterNameTagFromReservations(instances.Reservations) if len(clusterName) == 0 { - return clusterName, fmt.Errorf("Failed to detect EKS cluster name. No tag for cluster name found on EC2 instance") + logger.Warn("Failed to detect EKS cluster name. No tag for cluster name found on EC2 instance") + return "" } - return clusterName, nil + + return clusterName } func (e eksDetectorUtils) getClusterNameTagFromReservations(reservations []*ec2.Reservation) string { diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go b/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go index 06e44262e8e1..7f4b6625066a 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/detector_test.go @@ -31,9 +31,9 @@ func (detectorUtils *MockDetectorUtils) getConfigMap(_ context.Context, namespac return args.Get(0).(map[string]string), args.Error(1) } -func (detectorUtils *MockDetectorUtils) getClusterName(_ context.Context) (string, error) { +func (detectorUtils *MockDetectorUtils) getClusterName(_ context.Context, _ *zap.Logger) string { var reservations []*ec2.Reservation - return detectorUtils.getClusterNameTagFromReservations(reservations), nil + return detectorUtils.getClusterNameTagFromReservations(reservations) } func (detectorUtils *MockDetectorUtils) getClusterNameTagFromReservations(_ []*ec2.Reservation) string { From 6dbe6b003f3d4f4852ffbcdfc398d548d17f551c Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Fri, 10 Nov 2023 09:50:39 -0800 Subject: [PATCH 8/8] Update processor/resourcedetectionprocessor/internal/aws/eks/detector.go Co-authored-by: bryan-aguilar <46550959+bryan-aguilar@users.noreply.github.com> --- .../resourcedetectionprocessor/internal/aws/eks/detector.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go index d7eb6397f1bd..c287921e73ae 100644 --- a/processor/resourcedetectionprocessor/internal/aws/eks/detector.go +++ b/processor/resourcedetectionprocessor/internal/aws/eks/detector.go @@ -93,7 +93,7 @@ func (d *detector) Detect(ctx context.Context) (resource pcommon.Resource, schem d.rb.SetK8sClusterName(clusterName) } - return d.rb.Emit(), conventions.SchemaURL, err + return d.rb.Emit(), conventions.SchemaURL, nil } func isEKS(ctx context.Context, utils detectorUtils) (bool, error) {