Skip to content

Commit

Permalink
Merge branch 'main' into simont1/root-tag-e2e-test
Browse files Browse the repository at this point in the history
  • Loading branch information
knusbaum authored Apr 12, 2024
2 parents e1346a1 + 1f33b61 commit dc37d32
Show file tree
Hide file tree
Showing 51 changed files with 2,748 additions and 745 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/label-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Python dependencies
run: pip install -r requirements.txt -r tasks/requirements.txt
run: pip install -r tasks/requirements.txt
- name: Auto assign team label
run: inv -e github.assign-team-label --pr-id='${{ github.event.pull_request.number }}'
fetch-labels:
Expand Down
6 changes: 1 addition & 5 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,6 @@ variables:
DATADOG_AGENT_ARMBUILDIMAGES: v31802788-2dee8fe9
DATADOG_AGENT_SYSPROBE_BUILDIMAGES_SUFFIX: ""
DATADOG_AGENT_SYSPROBE_BUILDIMAGES: v31802788-2dee8fe9
DATADOG_AGENT_KERNEL_MATRIX_TESTING_BUILDIMAGES_SUFFIX: ""
DATADOG_AGENT_KERNEL_MATRIX_TESTING_BUILDIMAGES: v31802788-2dee8fe9
DATADOG_AGENT_NIKOS_BUILDIMAGES_SUFFIX: ""
DATADOG_AGENT_NIKOS_BUILDIMAGES: v31802788-2dee8fe9
DATADOG_AGENT_BTF_GEN_BUILDIMAGES_SUFFIX: ""
DATADOG_AGENT_BTF_GEN_BUILDIMAGES: v31802788-2dee8fe9
DATADOG_AGENT_BUILDERS: v28719426-b6a4fd9
Expand Down Expand Up @@ -367,7 +363,7 @@ variables:
- .gitlab/container_build/fakeintake.yml
- .gitlab/dev_container_deploy/fakeintake.yml
compare_to: main # TODO: use a variable, when this is supported https://gitlab.com/gitlab-org/gitlab/-/issues/369916

#
# Workflow rules
# Rules used to define whether a pipeline should run, and with which variables
Expand Down
1 change: 1 addition & 0 deletions .gitlab/benchmarks/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ benchmark:
tags: ["team:apm-k8s-tweaked-metal-datadog-agent", "specific:true"]
script:
- export ARTIFACTS_DIR="$(pwd)/artifacts" && mkdir -p $ARTIFACTS_DIR
- pip install -r requirements.txt
- ./test/benchmarks/apm_scripts/capture-hardware-software-info.sh
- ./test/benchmarks/apm_scripts/run-benchmarks.sh
- ./test/benchmarks/apm_scripts/analyze-results.sh
Expand Down
2 changes: 0 additions & 2 deletions .gitlab/check_merge/do_not_merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ do-not-merge:
[ ! -z "$DATADOG_AGENT_WINBUILDIMAGES_SUFFIX" ] ||
[ ! -z "$DATADOG_AGENT_ARMBUILDIMAGES_SUFFIX" ] ||
[ ! -z "$DATADOG_AGENT_SYSPROBE_BUILDIMAGES_SUFFIX" ] ||
[ ! -z "$DATADOG_AGENT_KERNEL_MATRIX_TESTING_BUILDIMAGES_SUFFIX" ] ||
[ ! -z "$DATADOG_AGENT_NIKOS_BUILDIMAGES_SUFFIX" ] ||
[ ! -z "$DATADOG_AGENT_BTF_GEN_BUILDIMAGES_SUFFIX" ] ||
[ ! -z "$TEST_INFRA_DEFINITIONS_BUILDIMAGES_SUFFIX" ]; then
echo "Pull request uses non-empty BUILDIMAGES_SUFFIX variable"
Expand Down
13 changes: 0 additions & 13 deletions .gitlab/kitchen_deploy/kitchen_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,10 @@
- filename=$(ls datadog-signing-keys*.deb); mv $filename datadog-signing-keys_${DD_PIPELINE_ID}.deb
- popd

# Avoid simultaneous writes on the repo metadata file that made kitchen tests fail before
.deploy_deb_resource_group-a6: &deploy_deb_resource_group-a6
resource_group: deploy_deb_a6

.deploy_deb_resource_group-a7: &deploy_deb_resource_group-a7
resource_group: deploy_deb_a7

.deploy_deb_resource_group-i7: &deploy_deb_resource_group-i7
resource_group: deploy_deb_i7

.deploy_deb_testing-a6:
stage: kitchen_deploy
image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-builders/gitlab_agent_deploy:$DATADOG_AGENT_BUILDERS
tags: ["arch:amd64"]
<<: *deploy_deb_resource_group-a6
variables:
DD_PIPELINE_ID: $CI_PIPELINE_ID-a6
before_script:
Expand All @@ -58,7 +47,6 @@
stage: kitchen_deploy
image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-builders/gitlab_agent_deploy:$DATADOG_AGENT_BUILDERS
tags: ["arch:amd64"]
<<: *deploy_deb_resource_group-i7
variables:
DD_PIPELINE_ID: $CI_PIPELINE_ID-i7
before_script:
Expand Down Expand Up @@ -104,7 +92,6 @@ deploy_deb_testing-a6_arm64:
stage: kitchen_deploy
image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-builders/gitlab_agent_deploy:$DATADOG_AGENT_BUILDERS
tags: ["arch:amd64"]
<<: *deploy_deb_resource_group-a7
variables:
DD_PIPELINE_ID: $CI_PIPELINE_ID-a7
before_script:
Expand Down
4 changes: 2 additions & 2 deletions .gitlab/source_test/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ tests_rpm-x64-py2:
- !reference [.except_disable_unit_tests]
- !reference [.except_mergequeue]
- when: on_success
image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-buildimages/rpm_x64_testing$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-buildimages/rpm_x64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
tags: ["arch:amd64"]
variables:
PYTHON_RUNTIMES: '2'
Expand All @@ -168,7 +168,7 @@ tests_rpm-x64-py3:
extends:
- .rtloader_tests
- .linux_tests_with_upload
image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-buildimages/rpm_x64_testing$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-buildimages/rpm_x64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
tags: ["arch:amd64"]
variables:
PYTHON_RUNTIMES: '3'
Expand Down
6 changes: 5 additions & 1 deletion cmd/agent/subcommands/jmx/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"github.com/DataDog/datadog-agent/comp/core/log/logimpl"
"github.com/DataDog/datadog-agent/comp/core/secrets"
"github.com/DataDog/datadog-agent/comp/core/settings"
"github.com/DataDog/datadog-agent/comp/core/settings/settingsimpl"
"github.com/DataDog/datadog-agent/comp/core/status"
"github.com/DataDog/datadog-agent/comp/core/tagger"
"github.com/DataDog/datadog-agent/comp/core/workloadmeta"
Expand Down Expand Up @@ -141,6 +142,10 @@ func Commands(globalParams *command.GlobalParams) []*cobra.Command {
workloadmeta.Module(),
apiimpl.Module(),
authtokenimpl.Module(),
// The jmx command do not have settings that change are runtime
// still, we need to pass it to ensure the API server is proprely initialized
settingsimpl.Module(),
fx.Supply(settings.Settings{}),
// TODO(components): this is a temporary hack as the StartServer() method of the API package was previously called with nil arguments
// This highlights the fact that the API Server created by JMX (through ExecJmx... function) should be different from the ones created
// in others commands such as run.
Expand All @@ -153,7 +158,6 @@ func Commands(globalParams *command.GlobalParams) []*cobra.Command {
fx.Provide(func() inventoryagent.Component { return nil }),
fx.Provide(func() inventoryhost.Component { return nil }),
fx.Provide(func() demultiplexer.Component { return nil }),
fx.Provide(func() settings.Component { return nil }),
fx.Provide(func() inventorychecks.Component { return nil }),
fx.Provide(func() packagesigning.Component { return nil }),
fx.Provide(func() optional.Option[rcservice.Component] { return optional.NewNoneOption[rcservice.Component]() }),
Expand Down
2 changes: 1 addition & 1 deletion cmd/serverless/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ func runAgent() {
ExtraTags: serverlessDaemon.ExtraTags,
Demux: serverlessDaemon.MetricAgent.Demux,
ProcessTrace: ta.Process,
DetectLambdaLibrary: func() bool { return serverlessDaemon.LambdaLibraryDetected },
DetectLambdaLibrary: serverlessDaemon.IsLambdaLibraryDetected,
InferredSpansEnabled: inferredspan.IsInferredSpansEnabled(),
}

Expand Down
7 changes: 5 additions & 2 deletions pkg/cli/subcommands/check/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import (
"github.com/DataDog/datadog-agent/comp/core/log/logimpl"
"github.com/DataDog/datadog-agent/comp/core/secrets"
"github.com/DataDog/datadog-agent/comp/core/settings"
"github.com/DataDog/datadog-agent/comp/core/settings/settingsimpl"
"github.com/DataDog/datadog-agent/comp/core/status"
"github.com/DataDog/datadog-agent/comp/core/status/statusimpl"
"github.com/DataDog/datadog-agent/comp/core/sysprobeconfig/sysprobeconfigimpl"
Expand Down Expand Up @@ -208,7 +209,10 @@ func MakeCommand(globalParamsGetter func() GlobalParams) *cobra.Command {
},
),
statusimpl.Module(),

// The check command do not have settings that change are runtime
// still, we need to pass it to ensure the API server is proprely initialized
settingsimpl.Module(),
fx.Supply(settings.Settings{}),
// TODO(components): this is a temporary hack as the StartServer() method of the API package was previously called with nil arguments
// This highlights the fact that the API Server created by JMX (through ExecJmx... function) should be different from the ones created
// in others commands such as run.
Expand All @@ -217,7 +221,6 @@ func MakeCommand(globalParamsGetter func() GlobalParams) *cobra.Command {
fx.Provide(func() replay.Component { return nil }),
fx.Provide(func() pidmap.Component { return nil }),
fx.Provide(func() serverdebug.Component { return nil }),
fx.Provide(func() settings.Component { return nil }),
fx.Provide(func() host.Component { return nil }),
fx.Provide(func() inventoryagent.Component { return nil }),
fx.Provide(func() inventoryhost.Component { return nil }),
Expand Down
8 changes: 1 addition & 7 deletions pkg/security/secl/compiler/eval/macro.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,5 @@ func (m *Macro) GetFields() []Field {

// GetFields - Returns all the Field that the MacroEvaluator handles
func (m *MacroEvaluator) GetFields() []Field {
fields := make([]Field, len(m.fieldValues))
i := 0
for key := range m.fieldValues {
fields[i] = key
i++
}
return fields
return m.fields
}
30 changes: 30 additions & 0 deletions pkg/serverless/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,15 @@ type Daemon struct {
// LambdaLibraryDetected represents whether the Datadog Lambda Library was detected in the environment
LambdaLibraryDetected bool

// LambdaLibraryStateLock keeps track of whether the Datadog Lambda Library was detected in the environment
LambdaLibraryStateLock sync.Mutex

// executionSpanIncomplete indicates whether the Lambda span has been completed by the Extension
executionSpanIncomplete bool

// ExecutionSpanStateLock keeps track of whether the serverless Invocation routes have been hit to complete the execution span
ExecutionSpanStateLock sync.Mutex

// runtimeStateMutex is used to ensure that modifying the state of the runtime is thread-safe
runtimeStateMutex sync.Mutex

Expand Down Expand Up @@ -435,3 +444,24 @@ func (d *Daemon) setTraceTags(tagMap map[string]string) bool {
}
return false
}

// IsLambdaLibraryDetected returns if the Lambda Library is in use
func (d *Daemon) IsLambdaLibraryDetected() bool {
d.LambdaLibraryStateLock.Lock()
defer d.LambdaLibraryStateLock.Unlock()
return d.LambdaLibraryDetected
}

// IsExecutionSpanIncomplete checks if the Lambda execution span was finished
func (d *Daemon) IsExecutionSpanIncomplete() bool {
d.ExecutionSpanStateLock.Lock()
defer d.ExecutionSpanStateLock.Unlock()
return d.executionSpanIncomplete
}

// SetExecutionSpanIncomplete keeps track of whether the Extension completed the Lambda execution span
func (d *Daemon) SetExecutionSpanIncomplete(spanIncomplete bool) {
d.ExecutionSpanStateLock.Lock()
defer d.ExecutionSpanStateLock.Unlock()
d.executionSpanIncomplete = spanIncomplete
}
4 changes: 4 additions & 0 deletions pkg/serverless/daemon/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ type Hello struct {
//nolint:revive // TODO(SERV) Fix revive linter
func (h *Hello) ServeHTTP(w http.ResponseWriter, r *http.Request) {
log.Debug("Hit on the serverless.Hello route.")
h.daemon.LambdaLibraryStateLock.Lock()
defer h.daemon.LambdaLibraryStateLock.Unlock()
h.daemon.LambdaLibraryDetected = true
}

Expand Down Expand Up @@ -53,6 +55,7 @@ type StartInvocation struct {

func (s *StartInvocation) ServeHTTP(w http.ResponseWriter, r *http.Request) {
log.Debug("Hit on the serverless.StartInvocation route.")
s.daemon.SetExecutionSpanIncomplete(true)
startTime := time.Now()
reqBody, err := io.ReadAll(r.Body)
if err != nil {
Expand Down Expand Up @@ -86,6 +89,7 @@ type EndInvocation struct {

func (e *EndInvocation) ServeHTTP(w http.ResponseWriter, r *http.Request) {
log.Debug("Hit on the serverless.EndInvocation route.")
e.daemon.SetExecutionSpanIncomplete(false)
endTime := time.Now()
ecs := e.daemon.ExecutionContext.GetCurrentState()
coldStartTags := e.daemon.ExecutionContext.GetColdStartTagsForRequestID(ecs.LastRequestID)
Expand Down
54 changes: 54 additions & 0 deletions pkg/serverless/daemon/routes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,30 @@ func TestTraceContext(t *testing.T) {
}
}

func TestHello(t *testing.T) {
assert := assert.New(t)

port := testutil.FreeTCPPort(t)
d := StartDaemon(fmt.Sprintf("127.0.0.1:%d", port))
time.Sleep(100 * time.Millisecond)
defer d.Stop()
d.InvocationProcessor = &invocationlifecycle.LifecycleProcessor{
ExtraTags: d.ExtraTags,
Demux: nil,
ProcessTrace: nil,
DetectLambdaLibrary: d.IsLambdaLibraryDetected,
}
client := &http.Client{}
body := bytes.NewBuffer([]byte(`{}`))
request, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/lambda/hello", port), body)
assert.Nil(err)
assert.False(d.IsLambdaLibraryDetected())
response, err := client.Do(request)
assert.Nil(err)
response.Body.Close()
assert.True(d.IsLambdaLibraryDetected())
}

func TestStartEndInvocationSpanParenting(t *testing.T) {
port := testutil.FreeTCPPort(t)
d := StartDaemon(fmt.Sprintf("127.0.0.1:%d", port))
Expand Down Expand Up @@ -332,6 +356,36 @@ func TestStartEndInvocationSpanParenting(t *testing.T) {
}
}

func TestStartEndInvocationIsExecutionSpanIncomplete(t *testing.T) {
assert := assert.New(t)
port := testutil.FreeTCPPort(t)
d := StartDaemon(fmt.Sprintf("127.0.0.1:%d", port))
time.Sleep(100 * time.Millisecond)
defer d.Stop()

m := &mockLifecycleProcessor{}
d.InvocationProcessor = m

client := &http.Client{}
body := bytes.NewBuffer([]byte(`{"key": "value"}`))
startReq, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/lambda/start-invocation", port), body)
assert.Nil(err)
startResp, err := client.Do(startReq)
assert.Nil(err)
startResp.Body.Close()
assert.True(m.OnInvokeStartCalled)
assert.True(d.IsExecutionSpanIncomplete())

body = bytes.NewBuffer([]byte(`{}`))
endReq, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/lambda/end-invocation", port), body)
assert.Nil(err)
endResp, err := client.Do(endReq)
assert.Nil(err)
endResp.Body.Close()
assert.True(m.OnInvokeEndCalled)
assert.False(d.IsExecutionSpanIncomplete())
}

// Helper function for reading test file
func getEventFromFile(filename string) string {
event, err := os.ReadFile("../trace/testdata/event_samples/" + filename)
Expand Down
1 change: 1 addition & 0 deletions pkg/serverless/invocationlifecycle/invocation_details.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type InvocationStartDetails struct {
type InvocationEndDetails struct {
EndTime time.Time
IsError bool
IsTimeout bool
RequestID string
ResponseRawPayload []byte
ColdStart bool
Expand Down
53 changes: 31 additions & 22 deletions pkg/serverless/invocationlifecycle/lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,32 +281,14 @@ func (lp *LifecycleProcessor) OnInvokeEnd(endDetails *InvocationEndDetails) {
spans = append(spans, span)

if lp.InferredSpansEnabled {
log.Debug("[lifecycle] Attempting to complete the inferred span")
log.Debugf("[lifecycle] Inferred span context: %+v", lp.GetInferredSpan().Span)
if lp.GetInferredSpan().Span.Start != 0 {
span0, span1 := lp.requestHandler.inferredSpans[0], lp.requestHandler.inferredSpans[1]
if span1 != nil {
log.Debug("[lifecycle] Completing a secondary inferred span")
lp.setParentIDForMultipleInferredSpans()
span1.AddTagToInferredSpan("http.status_code", statusCode)
span1.AddTagToInferredSpan("peer.service", lp.GetServiceName())
span := lp.completeInferredSpan(span1, lp.getInferredSpanStart(), endDetails.IsError)
spans = append(spans, span)
log.Debug("[lifecycle] The secondary inferred span attributes are %v", lp.requestHandler.inferredSpans[1])
}
span0.AddTagToInferredSpan("http.status_code", statusCode)
span0.AddTagToInferredSpan("peer.service", lp.GetServiceName())
span := lp.completeInferredSpan(span0, endDetails.EndTime, endDetails.IsError)
spans = append(spans, span)
log.Debugf("[lifecycle] The inferred span attributes are: %v", lp.GetInferredSpan())
} else {
log.Debug("[lifecyle] Failed to complete inferred span due to a missing start time. Please check that the event payload was received with the appropriate data")
}
inferredSpans := lp.endInferredSpan(statusCode, endDetails.EndTime, endDetails.IsError)
spans = append(spans, inferredSpans...)
}
lp.processTrace(spans)
}

if endDetails.IsError {
// We don't submit an error metric on timeouts since it should have already been submitted when the Extension receives a SHUTDOWN event
if endDetails.IsError && !endDetails.IsTimeout {
serverlessMetrics.SendErrorsEnhancedMetric(
lp.ExtraTags.Tags, endDetails.EndTime, lp.Demux,
)
Expand Down Expand Up @@ -385,3 +367,30 @@ func (lp *LifecycleProcessor) setParentIDForMultipleInferredSpans() {
lp.requestHandler.inferredSpans[1].Span.ParentID = lp.requestHandler.inferredSpans[0].Span.ParentID
lp.requestHandler.inferredSpans[0].Span.ParentID = lp.requestHandler.inferredSpans[1].Span.SpanID
}

// endInferredSpan attempts to complete any inferred spans and send them to intake
func (lp *LifecycleProcessor) endInferredSpan(statusCode string, endTime time.Time, isError bool) []*pb.Span {
spans := make([]*pb.Span, 0, 2)
log.Debug("[lifecycle] Attempting to complete the inferred span")
log.Debugf("[lifecycle] Inferred span context: %+v", lp.GetInferredSpan().Span)
if lp.GetInferredSpan().Span.Start != 0 {
span0, span1 := lp.requestHandler.inferredSpans[0], lp.requestHandler.inferredSpans[1]
if span1 != nil {
log.Debug("[lifecycle] Completing a secondary inferred span")
lp.setParentIDForMultipleInferredSpans()
span1.AddTagToInferredSpan("http.status_code", statusCode)
span1.AddTagToInferredSpan("peer.service", lp.GetServiceName())
span := lp.completeInferredSpan(span1, lp.getInferredSpanStart(), isError)
spans = append(spans, span)
log.Debug("[lifecycle] The secondary inferred span attributes are %v", lp.requestHandler.inferredSpans[1])
}
span0.AddTagToInferredSpan("http.status_code", statusCode)
span0.AddTagToInferredSpan("peer.service", lp.GetServiceName())
span := lp.completeInferredSpan(span0, endTime, isError)
spans = append(spans, span)
log.Debugf("[lifecycle] The inferred span attributes are: %v", lp.GetInferredSpan())
} else {
log.Debug("[lifecyle] Failed to complete inferred span due to a missing start time. Please check that the event payload was received with the appropriate data")
}
return spans
}
Loading

0 comments on commit dc37d32

Please sign in to comment.