Skip to content

Commit

Permalink
clean exit test all services (DataDog#31334)
Browse files Browse the repository at this point in the history
  • Loading branch information
jack0x2 authored Nov 25, 2024
1 parent a22a165 commit 33d1155
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 52 deletions.
22 changes: 7 additions & 15 deletions .gitlab/e2e/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -190,21 +190,13 @@ new-e2e-windows-service-test:
variables:
TARGETS: ./tests/windows/service-test
TEAM: windows-agent
EXTRA_PARAMS: --run TestServiceBehavior

# Temporary job for hunting a crash
new-e2e-windows-service-test-nofim:
extends: .new_e2e_template
needs:
- !reference [.needs_new_e2e_template]
- deploy_windows_testing-a7
rules:
- !reference [.on_windows_service_or_e2e_changes]
- !reference [.manual]
variables:
TARGETS: ./tests/windows/service-test
TEAM: windows-agent
EXTRA_PARAMS: --run TestNoFIMServiceBehavior
parallel:
matrix:
- EXTRA_PARAMS: --run TestServiceBehaviorAgentCommand
- EXTRA_PARAMS: --run TestServiceBehaviorPowerShell
- EXTRA_PARAMS: --run TestServiceBehaviorWhenDisabledSystemProbe
- EXTRA_PARAMS: --run TestServiceBehaviorWhenDisabledProcessAgent
- EXTRA_PARAMS: --run TestServiceBehaviorWhenDisabledTraceAgent

new-e2e-language-detection:
extends: .new_e2e_template_needs_deb_x64
Expand Down
6 changes: 6 additions & 0 deletions cmd/security-agent/main_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ package main

import (
"context"
"errors"
"fmt"
"os"
"path"

Expand Down Expand Up @@ -94,6 +96,10 @@ func (s *service) Run(svcctx context.Context) error {

err := start.RunAgent(log, config, telemetry, statusComponent, settings, wmeta)
if err != nil {
if errors.Is(err, start.ErrAllComponentsDisabled) {
// If all components are disabled, we should exit cleanly
return fmt.Errorf("%w: %w", servicemain.ErrCleanStopAfterInit, err)
}
return err
}

Expand Down
7 changes: 4 additions & 3 deletions cmd/security-agent/subcommands/start/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ func start(log log.Component, config config.Component, _ secrets.Component, _ st
defer StopAgent(log)

err := RunAgent(log, config, telemetry, statusComponent, settings, wmeta)
if errors.Is(err, errAllComponentsDisabled) || errors.Is(err, errNoAPIKeyConfigured) {
if errors.Is(err, ErrAllComponentsDisabled) || errors.Is(err, errNoAPIKeyConfigured) {
return nil
}
if err != nil {
Expand Down Expand Up @@ -257,7 +257,8 @@ var (
expvarServer *http.Server
)

var errAllComponentsDisabled = errors.New("all security-agent component are disabled")
// ErrAllComponentsDisabled is returned when all components are disabled
var ErrAllComponentsDisabled = errors.New("all security-agent component are disabled")
var errNoAPIKeyConfigured = errors.New("no API key configured")

// RunAgent initialized resources and starts API server
Expand All @@ -274,7 +275,7 @@ func RunAgent(log log.Component, config config.Component, telemetry telemetry.Co
// to startup because of an error. Only applies on Debian 7.
time.Sleep(5 * time.Second)

return errAllComponentsDisabled
return ErrAllComponentsDisabled
}

if !config.IsSet("api_key") {
Expand Down
17 changes: 8 additions & 9 deletions pkg/util/winutil/servicemain/servicemain.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,22 +296,21 @@ func (s *controlHandler) Execute(args []string, r <-chan svc.ChangeRequest, chan
defer close(done)
// Run the actual agent/service
err = s.service.Run(ctx)
if err != nil {
s.eventlog(messagestrings.MSG_SERVICE_FAILED, err.Error())
}
}()
select {
case <-done:
if err != nil {
case <-cleanExitCtx.Done():
err = errors.New("service did not cleanly shutdown in a timely manner, hard stopping service")
}
if err != nil {
if errors.Is(err, ErrCleanStopAfterInit) {
s.eventlog(messagestrings.MSG_AGENT_CLEAN_STOP_AFTER_INIT, err.Error())
} else {
s.eventlog(messagestrings.MSG_SERVICE_FAILED, err.Error())
// since exitGate is meant to avoid an error, if we are returning
// with an error then we can skip the exitGate.
return
}
case <-cleanExitCtx.Done():
s.eventlog(messagestrings.MSG_SERVICE_FAILED, "service did not cleanly shutdown in a timely manner, hard stopping service")
// since exitGate is meant to avoid an error, if we are returning
// with an error then we can skip the exitGate.
return
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# disable logs agent
logs_enabled: false

# disable process agent
process_config:
process_collection:
enabled: false
container_collection:
enabled: false
enabled: "disabled"
process_discovery:
enabled: false

# trace-agent is running by default
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# enable logs agent
logs_enabled: true

# enable process agent
process_config:
# live process
process_collection:
enabled: true

# trace-agent is running by default
apm_config:
enabled: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# enable security agent
runtime_security_config:
enabled: false
117 changes: 92 additions & 25 deletions test/new-e2e/tests/windows/service-test/startstop_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ import (
//go:embed fixtures/datadog.yaml
var agentConfig string

//go:embed fixtures/datadog-pa-disabled.yaml
var agentConfigPADisabled string

//go:embed fixtures/datadog-ta-disabled.yaml
var agentConfigTADisabled string

//go:embed fixtures/system-probe.yaml
var systemProbeConfig string

Expand All @@ -45,22 +51,25 @@ var systemProbeDisabled string
//go:embed fixtures/security-agent.yaml
var securityAgentConfig string

//go:embed fixtures/security-agent-disabled.yaml
var securityAgentConfigDisabled string

// TestServiceBehaviorAgentCommandNoFIM tests the service behavior when controlled by Agent commands
func TestNoFIMServiceBehaviorAgentCommand(t *testing.T) {
s := &agentServiceCommandSuite{}
run(t, s, systemProbeNoFIMConfig)
run(t, s, systemProbeNoFIMConfig, agentConfig, securityAgentConfig)
}

// TestServiceBehaviorPowerShellNoFIM tests the service behavior when controlled by PowerShell commands
func TestNoFIMServiceBehaviorPowerShell(t *testing.T) {
s := &powerShellServiceCommandSuite{}
run(t, s, systemProbeNoFIMConfig)
run(t, s, systemProbeNoFIMConfig, agentConfig, securityAgentConfig)
}

// TestServiceBehaviorAgentCommand tests the service behavior when controlled by Agent commands
func TestServiceBehaviorAgentCommand(t *testing.T) {
s := &agentServiceCommandSuite{}
run(t, s, systemProbeConfig)
run(t, s, systemProbeConfig, agentConfig, securityAgentConfig)
}

type agentServiceCommandSuite struct {
Expand Down Expand Up @@ -96,7 +105,7 @@ func (s *agentServiceCommandSuite) SetupSuite() {
// TestServiceBehaviorAgentCommand tests the service behavior when controlled by PowerShell commands
func TestServiceBehaviorPowerShell(t *testing.T) {
s := &powerShellServiceCommandSuite{}
run(t, s, systemProbeConfig)
run(t, s, systemProbeConfig, agentConfig, securityAgentConfig)
}

type powerShellServiceCommandSuite struct {
Expand Down Expand Up @@ -222,29 +231,77 @@ func (s *powerShellServiceCommandSuite) TestHardExitEventLogEntry() {
}, 1*time.Minute, 1*time.Second, "should have hard exit messages in the event log")
}

type agentServiceDisabledSuite struct {
baseStartStopSuite
disabledServices []string
}

// TestServiceBehaviorWhenDisabled tests the service behavior when disabled in the configuration
func TestServiceBehaviorWhenDisabled(t *testing.T) {
s := &agentServiceDisabledSuite{}
run(t, s, systemProbeDisabled)
func TestServiceBehaviorWhenDisabledSystemProbe(t *testing.T) {
s := &agentServiceDisabledSystemProbeSuite{}
s.disabledServices = []string{
"datadog-security-agent",
"datadog-system-probe",
"ddnpm",
"ddprocmon",
}
run(t, s, systemProbeDisabled, agentConfig, securityAgentConfigDisabled)
}

type agentServiceDisabledSuite struct {
baseStartStopSuite
type agentServiceDisabledSystemProbeSuite struct {
agentServiceDisabledSuite
}

// TestServiceBehaviorWhenDisabledProcessAgent tests the service behavior when disabled in the configuration
func TestServiceBehaviorWhenDisabledProcessAgent(t *testing.T) {
s := &agentServiceDisabledProcessAgentSuite{}
s.disabledServices = []string{
"datadog-process-agent",
"datadog-security-agent",
"datadog-system-probe",
"ddnpm",
"ddprocmon",
}
run(t, s, systemProbeDisabled, agentConfigPADisabled, securityAgentConfigDisabled)
}

type agentServiceDisabledProcessAgentSuite struct {
agentServiceDisabledSuite
}

func TestServiceBehaviorWhenDisabledTraceAgent(t *testing.T) {
s := &agentServiceDisabledTraceAgentSuite{}
s.disabledServices = []string{
"datadog-trace-agent",
}
run(t, s, systemProbeConfig, agentConfigTADisabled, securityAgentConfig)
}

type agentServiceDisabledTraceAgentSuite struct {
agentServiceDisabledSuite
}

func (s *agentServiceDisabledSuite) SetupSuite() {
s.baseStartStopSuite.SetupSuite()

// set up the expected services before calling the base setup
s.runningUserServices = func() []string {
return []string{
"datadogagent",
"datadog-trace-agent",
"datadog-process-agent",
runningServices := []string{}
for _, service := range s.getInstalledUserServices() {
if !slices.Contains(s.disabledServices, service) {
runningServices = append(runningServices, service)
}
}
return runningServices
}
s.runningServices = func() []string {
return s.runningUserServices()
runningServices := []string{}
for _, service := range s.getInstalledServices() {
if !slices.Contains(s.disabledServices, service) {
runningServices = append(runningServices, service)
}
}
return runningServices
}

s.startAgentCommand = func(host *components.RemoteHost) error {
Expand All @@ -268,23 +325,29 @@ func (s *agentServiceDisabledSuite) SetupSuite() {
}

func (s *agentServiceDisabledSuite) TestStartingDisabledService() {
kernel := s.getInstalledKernelServices()
// check that the system probe is not running
s.assertServiceState("Stopped", "datadog-system-probe")
for _, service := range s.disabledServices {
s.assertServiceState("Stopped", service)

// try and start it and verify that it does correctly outputs to event log
err := windowsCommon.StartService(s.Env().RemoteHost, "datadog-system-probe")
s.Require().NoError(err, "should start datadog-system-probe")
// verify that we only try user services
if !slices.Contains(kernel, service) {
// try and start it and verify that it does correctly outputs to event log
err := windowsCommon.StartService(s.Env().RemoteHost, service)
s.Require().NoError(err, fmt.Sprintf("should start %s", service))

//verify that service returns to stopped state
s.assertServiceState("Stopped", "datadog-system-probe")
// verify that service returns to stopped state
s.assertServiceState("Stopped", service)
}
}

// Verify there are not errors in the event log
entries, err := s.getAgentEventLogErrorsAndWarnings()
s.Require().NoError(err, "should get errors and warnings from Application event log")
s.Require().Empty(entries, "should not have errors or warnings from agents in the event log")
}

func run[Env any](t *testing.T, s e2e.Suite[Env], systemProbeConfig string) {
func run[Env any](t *testing.T, s e2e.Suite[Env], systemProbeConfig string, agentConfig string, securityAgentConfig string) {
opts := []e2e.SuiteOption{e2e.WithProvisioner(awsHostWindows.ProvisionerNoFakeIntake(
awsHostWindows.WithAgentOptions(
agentparams.WithAgentConfig(agentConfig),
Expand Down Expand Up @@ -583,13 +646,17 @@ func (s *baseStartStopSuite) getInstalledUserServices() []string {
}
}

// expectedInstalledServices returns the list of services that should be installed by the agent
func (s *baseStartStopSuite) getInstalledServices() []string {
user := s.getInstalledUserServices()
kernel := []string{
func (s *baseStartStopSuite) getInstalledKernelServices() []string {
return []string{
"ddnpm",
"ddprocmon",
}
}

// expectedInstalledServices returns the list of services that should be installed by the agent
func (s *baseStartStopSuite) getInstalledServices() []string {
user := s.getInstalledUserServices()
kernel := s.getInstalledKernelServices()
return append(user, kernel...)
}

Expand Down

0 comments on commit 33d1155

Please sign in to comment.