From 067da18dd6e3ae5deae510c4fe734a126f61c33d Mon Sep 17 00:00:00 2001 From: Kevin Fairise <132568982+KevinFairise2@users.noreply.github.com> Date: Fri, 6 Sep 2024 15:21:29 +0200 Subject: [PATCH] Add another OTel test for the ingestor to replace old Argo e2e test (#28702) Co-authored-by: liustanley --- .gitlab/e2e/e2e.yml | 1 - .gitlab/e2e_k8s/e2e_k8s.yml | 12 - test/e2e/argo-workflows/otlp-workflow.yaml | 156 ------------ .../argo-workflows/templates/otlp-test.yaml | 229 ------------------ test/new-e2e/tests/otel/otel_test.go | 1 + .../tests/otel/otlp-ingest/pipelines_test.go | 148 +++++++++++ 6 files changed, 149 insertions(+), 398 deletions(-) delete mode 100644 test/e2e/argo-workflows/otlp-workflow.yaml delete mode 100644 test/e2e/argo-workflows/templates/otlp-test.yaml create mode 100644 test/new-e2e/tests/otel/otlp-ingest/pipelines_test.go diff --git a/.gitlab/e2e/e2e.yml b/.gitlab/e2e/e2e.yml index efbed3a2b7422..785ff989ce1d1 100644 --- a/.gitlab/e2e/e2e.yml +++ b/.gitlab/e2e/e2e.yml @@ -1,7 +1,6 @@ --- # e2e stage # Contains test jobs based on the new-e2e tests framework - .new_e2e_template: stage: e2e image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/test-infra-definitions/runner$TEST_INFRA_DEFINITIONS_BUILDIMAGES_SUFFIX:$TEST_INFRA_DEFINITIONS_BUILDIMAGES diff --git a/.gitlab/e2e_k8s/e2e_k8s.yml b/.gitlab/e2e_k8s/e2e_k8s.yml index 2c3faca866165..db52467098c4a 100644 --- a/.gitlab/e2e_k8s/e2e_k8s.yml +++ b/.gitlab/e2e_k8s/e2e_k8s.yml @@ -68,15 +68,3 @@ k8s-e2e-cspm-main: retry: 1 variables: ARGO_WORKFLOW: cspm - -k8s-e2e-otlp-dev: - extends: .k8s_e2e_template_dev - rules: !reference [.on_dev_branch_manual] - variables: - ARGO_WORKFLOW: otlp - -k8s-e2e-otlp-main: - extends: .k8s_e2e_template_main - rules: !reference [.on_main] - variables: - ARGO_WORKFLOW: otlp diff --git a/test/e2e/argo-workflows/otlp-workflow.yaml b/test/e2e/argo-workflows/otlp-workflow.yaml deleted file mode 100644 index 9320d2ae9ad7c..0000000000000 --- a/test/e2e/argo-workflows/otlp-workflow.yaml +++ /dev/null @@ -1,156 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: Workflow -metadata: - generateName: argo-datadog-agent- -spec: - entrypoint: main - onExit: exit-handler - arguments: - parameters: - - name: datadog-agent-image-repository - - name: datadog-agent-image-tag - - name: datadog-cluster-agent-image-repository - - name: datadog-cluster-agent-image-tag - - name: ci_commit_short_sha - - name: ci_pipeline_id - - name: ci_job_id - volumes: - - name: datadog-agent-volume - hostPath: - path: /host/datadog-agent - templates: - - name: main - inputs: - parameters: - - name: datadog-agent-image-repository - - name: datadog-agent-image-tag - - name: datadog-cluster-agent-image-repository - - name: datadog-cluster-agent-image-tag - - name: ci_commit_short_sha - - name: ci_pipeline_id - - name: ci_job_id - steps: - - - name: start-fake-datadog - templateRef: - name: fake-datadog - template: create - arguments: - parameters: - - name: namespace - value: "{{workflow.namespace}}" - - - name: start-otlp-test - templateRef: - name: otlp-test - template: create - arguments: - parameters: - - name: namespace - value: "{{workflow.namespace}}" - - - - name: fake-dd-reset - templateRef: - name: fake-datadog - template: reset - arguments: - parameters: - - name: namespace - value: "{{workflow.namespace}}" - - - - name: start-datadog-agent - templateRef: - name: datadog-agent - template: create - arguments: - parameters: - - name: namespace - value: "{{workflow.namespace}}" - - name: agent-image-repository - value: "{{inputs.parameters.datadog-agent-image-repository}}" - - name: agent-image-tag - value: "{{inputs.parameters.datadog-agent-image-tag}}" - - name: dd-url - value: "http://fake-datadog.{{workflow.namespace}}.svc.cluster.local" - - name: site - value: "" - - name: cluster-agent-image-repository - value: "{{inputs.parameters.datadog-cluster-agent-image-repository}}" - - name: cluster-agent-image-tag - value: "{{inputs.parameters.datadog-cluster-agent-image-tag}}" - - name: ci_commit_short_sha - value: "{{inputs.parameters.ci_commit_short_sha}}" - - name: ci_pipeline_id - value: "{{inputs.parameters.ci_pipeline_id}}" - - name: ci_job_id - value: "{{inputs.parameters.ci_job_id}}" - - name: remote_configuration_enabled - value: "false" - - name: networkmonitoring_enabled - value: "false" - - - - name: wait-datadog-agent - templateRef: - name: datadog-agent - template: wait - arguments: - parameters: - - name: namespace - value: "{{workflow.namespace}}" - - - - name: test-otlp - templateRef: - name: otlp-test - template: test - arguments: - parameters: - - name: namespace - value: "{{workflow.namespace}}" - - - name: exit-handler - steps: - - - name: delete - template: delete - when: "{{workflow.status}} == Succeeded" - - - name: diagnose - template: diagnose - when: "{{workflow.status}} != Succeeded" - - - name: delete - steps: - - - name: stop-datadog-agent - templateRef: - name: datadog-agent - template: delete - arguments: - parameters: - - name: namespace - value: "{{workflow.namespace}}" - - - name: stop-otlp-test - templateRef: - name: otlp-test - template: delete - arguments: - parameters: - - name: namespace - value: "{{workflow.namespace}}" - - - name: diagnose - steps: - - - name: diagnose-datadog-agent - templateRef: - name: datadog-agent - template: diagnose - arguments: - parameters: - - name: namespace - value: "{{workflow.namespace}}" - - name: diagnose-otlp-test - templateRef: - name: otlp-test - template: diagnose - arguments: - parameters: - - name: namespace - value: "{{workflow.namespace}}" diff --git a/test/e2e/argo-workflows/templates/otlp-test.yaml b/test/e2e/argo-workflows/templates/otlp-test.yaml deleted file mode 100644 index 9f9716ffd9fd2..0000000000000 --- a/test/e2e/argo-workflows/templates/otlp-test.yaml +++ /dev/null @@ -1,229 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: WorkflowTemplate -metadata: - name: otlp-test -spec: - templates: - - name: create-sender-config - inputs: - parameters: - - name: namespace - resource: - action: apply - manifest: | - apiVersion: v1 - kind: ConfigMap - metadata: - name: sender-config - namespace: {{inputs.parameters.namespace}} - data: - sender-config: |+ - receivers: - file: - path: /etc/data/metrics.data - loop: - enabled: true - period: 10s - exporters: - otlp: - endpoint: ${DD_AGENT_OTLP_ENDPOINT} - tls: - insecure: true - service: - pipelines: - metrics: - receivers: [file] - exporters: [otlp] - - name: create-metrics-data - inputs: - parameters: - - name: namespace - resource: - action: apply - manifest: | - apiVersion: v1 - kind: ConfigMap - metadata: - name: metrics-data - namespace: {{inputs.parameters.namespace}} - data: - metrics-data: |+ - {"resourceMetrics":[{"resource":{"attributes":[{"key":"telemetry.sdk.language","value":{"stringValue":"go"}},{"key":"telemetry.sdk.name","value":{"stringValue":"opentelemetry"}},{"key":"telemetry.sdk.version","value":{"stringValue":"1.0.0"}}]},"instrumentationLibraryMetrics":[{"instrumentationLibrary":{"name":"test-meter"},"metrics":[{"name":"an_important_metric","description":"Measures the cumulative epicness of the app","sum":{"dataPoints":[{"attributes":[{"key":"labelA","value":{"stringValue":"chocolate"}},{"key":"labelB","value":{"stringValue":"raspberry"}},{"key":"labelC","value":{"stringValue":"vanilla"}}],"startTimeUnixNano":"1637674530222121000","timeUnixNano":"1637674532223257300","asDouble":14}],"aggregationTemporality":"AGGREGATION_TEMPORALITY_CUMULATIVE","isMonotonic":true}},{"name":"test2.sendtodev.histogram","description":"IO read bytes","histogram":{"dataPoints":[{"attributes":[{"key":"labelA","value":{"stringValue":"chocolate"}},{"key":"labelB","value":{"stringValue":"raspberry"}},{"key":"labelC","value":{"stringValue":"vanilla"}}],"startTimeUnixNano":"1637674530222121000","timeUnixNano":"1637674532223257300","count":"42","sum":1541400,"bucketCounts":["14","0","14","0","0","14","0","0","0","0","0","0"],"explicitBounds":[5000,10000,25000,50000,100000,250000,500000,1000000,2500000,5000000,10000000]}],"aggregationTemporality":"AGGREGATION_TEMPORALITY_CUMULATIVE"}}]}],"schemaUrl":"https://opentelemetry.io/schemas/v1.4.0"}]} - {"resourceMetrics":[{"resource":{"attributes":[{"key":"telemetry.sdk.language","value":{"stringValue":"go"}},{"key":"telemetry.sdk.name","value":{"stringValue":"opentelemetry"}},{"key":"telemetry.sdk.version","value":{"stringValue":"1.0.0"}}]},"instrumentationLibraryMetrics":[{"instrumentationLibrary":{"name":"test-meter"},"metrics":[{"name":"an_important_metric","description":"Measures the cumulative epicness of the app","sum":{"dataPoints":[{"attributes":[{"key":"labelA","value":{"stringValue":"chocolate"}},{"key":"labelB","value":{"stringValue":"raspberry"}},{"key":"labelC","value":{"stringValue":"vanilla"}}],"startTimeUnixNano":"1637674530222121000","timeUnixNano":"1637674534223387200","asDouble":27}],"aggregationTemporality":"AGGREGATION_TEMPORALITY_CUMULATIVE","isMonotonic":true}},{"name":"test2.sendtodev.histogram","description":"IO read bytes","histogram":{"dataPoints":[{"attributes":[{"key":"labelA","value":{"stringValue":"chocolate"}},{"key":"labelB","value":{"stringValue":"raspberry"}},{"key":"labelC","value":{"stringValue":"vanilla"}}],"startTimeUnixNano":"1637674530222121000","timeUnixNano":"1637674534223387200","count":"81","sum":2972700,"bucketCounts":["27","0","27","0","0","27","0","0","0","0","0","0"],"explicitBounds":[5000,10000,25000,50000,100000,250000,500000,1000000,2500000,5000000,10000000]}],"aggregationTemporality":"AGGREGATION_TEMPORALITY_CUMULATIVE"}}]}],"schemaUrl":"https://opentelemetry.io/schemas/v1.4.0"}]} - - name: create-deployment - inputs: - parameters: - - name: namespace - resource: - action: apply - manifest: | - apiVersion: apps/v1 - kind: Deployment - metadata: - name: otlp-sender - namespace: {{inputs.parameters.namespace}} - spec: - replicas: 1 - selector: - matchLabels: - app: otlp-sender - template: - metadata: - labels: - app: otlp-sender - spec: - containers: - - name: sender - image: datadog/docker-library:e2e-otlp-sender_latest - resources: - requests: - memory: "32Mi" - cpu: "100m" - limits: - memory: "32Mi" - cpu: "100m" - env: - - name: DD_AGENT_HOST - valueFrom: - fieldRef: - fieldPath: status.hostIP - - name: DD_AGENT_OTLP_ENDPOINT - value: http://$(DD_AGENT_HOST):4317 - volumeMounts: - - name: "sender-config" - mountPath: "/etc/otel" - - name: "metrics-data" - mountPath: "/etc/data" - volumes: - - name: "sender-config" - configMap: - name: "sender-config" - items: - - key: sender-config - path: config.yaml - - name: "metrics-data" - configMap: - name: "metrics-data" - items: - - key: metrics-data - path: metrics.data - - name: create - inputs: - parameters: - - name: namespace - steps: - - - name: sender-config - template: create-sender-config - arguments: - parameters: - - name: namespace - value: "{{inputs.parameters.namespace}}" - - name: metrics-data - template: create-metrics-data - arguments: - parameters: - - name: namespace - value: "{{inputs.parameters.namespace}}" - - name: deployment - template: create-deployment - arguments: - parameters: - - name: namespace - value: "{{inputs.parameters.namespace}}" - - - name: delete-deployment - inputs: - parameters: - - name: namespace - resource: - action: delete - manifest: | - apiVersion: apps/v1 - kind: Deployment - metadata: - name: otlp-sender - namespace: {{inputs.parameters.namespace}} - - name: delete-sender-config - inputs: - parameters: - - name: namespace - resource: - action: delete - manifest: | - apiVersion: v1 - kind: ConfigMap - metadata: - name: sender-config - namespace: {{inputs.parameters.namespace}} - - name: delete-metrics-data - inputs: - parameters: - - name: namespace - resource: - action: delete - manifest: | - apiVersion: v1 - kind: ConfigMap - metadata: - name: metrics-data - namespace: {{inputs.parameters.namespace}} - - name: delete - inputs: - parameters: - - name: namespace - steps: - - - name: deployment - template: delete-deployment - arguments: - parameters: - - name: namespace - value: "{{inputs.parameters.namespace}}" - - name: sender-config - template: delete-sender-config - arguments: - parameters: - - name: namespace - value: "{{inputs.parameters.namespace}}" - - name: metrics-data - template: delete-metrics-data - arguments: - parameters: - - name: namespace - value: "{{inputs.parameters.namespace}}" - - - name: test - inputs: - parameters: - - name: namespace - activeDeadlineSeconds: 300 - script: - image: mongo:4.4.1 - command: [mongo, "fake-datadog.{{inputs.parameters.namespace}}.svc.cluster.local/datadog"] - source: | - while (1) { - sleep(2000); - - // Gauges - var nb = db.series.find({metric: "an_important_metric"}).count(); - if (nb == 0) { - print("no 'an_important_metric' metric found"); - continue; - } - - print("All good"); - break; - } - - name: diagnose - inputs: - parameters: - - name: namespace - activeDeadlineSeconds: 300 - script: - image: alpine/k8s:1.27.1 - command: [sh] - source: | - set -euo pipefail - - kubectl --namespace {{inputs.parameters.namespace}} get pods -l app=otlp-sender -o custom-columns=name:metadata.name --no-headers | while read -r po; do - kubectl --namespace {{inputs.parameters.namespace}} logs $po -c sender || true - done diff --git a/test/new-e2e/tests/otel/otel_test.go b/test/new-e2e/tests/otel/otel_test.go index d67c79aefcd90..1a00bf90d9e51 100644 --- a/test/new-e2e/tests/otel/otel_test.go +++ b/test/new-e2e/tests/otel/otel_test.go @@ -38,6 +38,7 @@ type linuxTestSuite struct { var collectorConfig string func TestOTel(t *testing.T) { + t.Parallel() e2e.Run(t, &linuxTestSuite{}, e2e.WithProvisioner(awskubernetes.KindProvisioner(awskubernetes.WithAgentOptions(kubernetesagentparams.WithoutDualShipping(), kubernetesagentparams.WithOTelAgent(), kubernetesagentparams.WithOTelConfig(collectorConfig))))) } diff --git a/test/new-e2e/tests/otel/otlp-ingest/pipelines_test.go b/test/new-e2e/tests/otel/otlp-ingest/pipelines_test.go new file mode 100644 index 0000000000000..f15c74e858220 --- /dev/null +++ b/test/new-e2e/tests/otel/otlp-ingest/pipelines_test.go @@ -0,0 +1,148 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package localkubernetes contains the provisioner for the local Kubernetes based environments + +package otel + +import ( + "context" + _ "embed" + "fmt" + "testing" + "time" + + "github.com/DataDog/test-infra-definitions/components/datadog/kubernetesagentparams" + "github.com/stretchr/testify/assert" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/DataDog/datadog-agent/test/fakeintake/aggregator" + fakeintake "github.com/DataDog/datadog-agent/test/fakeintake/client" + "github.com/DataDog/datadog-agent/test/new-e2e/pkg/e2e" + "github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments" + awskubernetes "github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments/aws/kubernetes" +) + +type otelIngestTestSuite struct { + e2e.BaseSuite[environments.Kubernetes] +} + +func TestOTelIngest(t *testing.T) { + values := ` +datadog: + otlp: + receiver: + protocols: + grpc: + enabled: true + logs: + enabled: true +` + t.Parallel() + e2e.Run(t, &otelIngestTestSuite{}, e2e.WithProvisioner(awskubernetes.KindProvisioner(awskubernetes.WithAgentOptions(kubernetesagentparams.WithoutDualShipping(), kubernetesagentparams.WithHelmValues(values))))) +} + +func (s *otelIngestTestSuite) TestOTLPTraces() { + ctx := context.Background() + s.Env().FakeIntake.Client().FlushServerAndResetAggregators() + service := "telemetrygen-job" + numTraces := 10 + + s.T().Log("Starting telemetrygen") + s.createTelemetrygenJob(ctx, "traces", []string{"--service", service, "--traces", fmt.Sprint(numTraces)}) + + s.T().Log("Waiting for traces") + s.EventuallyWithT(func(c *assert.CollectT) { + traces, err := s.Env().FakeIntake.Client().GetTraces() + assert.NoError(c, err) + assert.NotEmpty(c, traces) + trace := traces[0] + assert.Equal(c, "none", trace.Env) + assert.NotEmpty(c, trace.TracerPayloads) + tp := trace.TracerPayloads[0] + assert.NotEmpty(c, tp.Chunks) + assert.NotEmpty(c, tp.Chunks[0].Spans) + spans := tp.Chunks[0].Spans + for _, sp := range spans { + assert.Equal(c, service, sp.Service) + assert.Equal(c, "telemetrygen", sp.Meta["otel.library.name"]) + } + }, 2*time.Minute, 10*time.Second) +} + +func (s *otelIngestTestSuite) TestOTLPMetrics() { + ctx := context.Background() + s.Env().FakeIntake.Client().FlushServerAndResetAggregators() + service := "telemetrygen-job" + serviceAttribute := fmt.Sprintf("service.name=\"%v\"", service) + numMetrics := 10 + + s.T().Log("Starting telemetrygen") + s.createTelemetrygenJob(ctx, "metrics", []string{"--metrics", fmt.Sprint(numMetrics), "--otlp-attributes", serviceAttribute}) + + s.T().Log("Waiting for metrics") + s.EventuallyWithT(func(c *assert.CollectT) { + serviceTag := "service:" + service + metrics, err := s.Env().FakeIntake.Client().FilterMetrics("gen", fakeintake.WithTags[*aggregator.MetricSeries]([]string{serviceTag})) + assert.NoError(c, err) + assert.NotEmpty(c, metrics) + }, 2*time.Minute, 10*time.Second) +} + +func (s *otelIngestTestSuite) TestOTLPLogs() { + ctx := context.Background() + s.Env().FakeIntake.Client().FlushServerAndResetAggregators() + service := "telemetrygen-job" + serviceAttribute := fmt.Sprintf("service.name=\"%v\"", service) + numLogs := 10 + logBody := "telemetrygen log" + + s.T().Log("Starting telemetrygen") + s.createTelemetrygenJob(ctx, "logs", []string{"--logs", fmt.Sprint(numLogs), "--otlp-attributes", serviceAttribute, "--body", logBody}) + + s.T().Log("Waiting for logs") + s.EventuallyWithT(func(c *assert.CollectT) { + logs, err := s.Env().FakeIntake.Client().FilterLogs(service) + assert.NoError(c, err) + assert.NotEmpty(c, logs) + for _, log := range logs { + assert.Contains(c, log.Message, logBody) + } + }, 2*time.Minute, 10*time.Second) +} + +func (s *otelIngestTestSuite) createTelemetrygenJob(ctx context.Context, telemetry string, options []string) { + var ttlSecondsAfterFinished int32 = 600 //nolint:revive // We want to see this is explicitly set to 0 + var backOffLimit int32 = 4 + + otlpEndpoint := fmt.Sprintf("%v:4317", s.Env().Agent.LinuxNodeAgent.LabelSelectors["app"]) + jobSpec := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("telemetrygen-job-%v", telemetry), + Namespace: "datadog", + }, + Spec: batchv1.JobSpec{ + TTLSecondsAfterFinished: &ttlSecondsAfterFinished, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "telemetrygen-job", + Image: "ghcr.io/open-telemetry/opentelemetry-collector-contrib/telemetrygen:latest", + Command: append([]string{"/telemetrygen", telemetry, "--otlp-endpoint", otlpEndpoint, "--otlp-insecure"}, options...), + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + }, + BackoffLimit: &backOffLimit, + }, + } + + _, err := s.Env().KubernetesCluster.Client().BatchV1().Jobs("datadog").Create(ctx, jobSpec, metav1.CreateOptions{}) + assert.NoError(s.T(), err, "Could not properly start job") +}