diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 43f5a27ca96e6e..1fcde84566101d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -84,9 +84,9 @@ stages: - dev_container_deploy - deploy_containers - deploy_packages - - trigger_release - deploy_cws_instrumentation - deploy_dca + - trigger_release - choco_build - choco_deploy - internal_image_deploy diff --git a/.gitlab/e2e_test_junit_upload.yml b/.gitlab/e2e_test_junit_upload.yml index a69971259719da..21ff6d37f54626 100644 --- a/.gitlab/e2e_test_junit_upload.yml +++ b/.gitlab/e2e_test_junit_upload.yml @@ -35,6 +35,10 @@ e2e_test_junit_upload: - new-e2e-agent-platform-install-script-centos-iot-agent-a7-x86_64 - new-e2e-agent-platform-install-script-centos-dogstatsd-a7-x86_64 - new-e2e-agent-platform-install-script-centos-a6-x86_64 + - new-e2e-agent-platform-install-script-centos-fips-a6-x86_64 + - new-e2e-agent-platform-install-script-centos-fips-a7-x86_64 + - new-e2e-agent-platform-install-script-centos-fips-iot-agent-a7-x86_64 + - new-e2e-agent-platform-install-script-centos-fips-dogstatsd-a7-x86_64 - new-e2e-npm-main - new-e2e-process-main - new-e2e-cws-main diff --git a/.gitlab/kitchen_testing/centos.yml b/.gitlab/kitchen_testing/centos.yml index dd4080bcd03780..a89eefef8dfb09 100644 --- a/.gitlab/kitchen_testing/centos.yml +++ b/.gitlab/kitchen_testing/centos.yml @@ -96,35 +96,6 @@ # Kitchen: final test matrix (tests * scenarios) # ---------------------------------------------- -# We do install_script, step_by_step, upgrade6, upgrade7 and upgrade7_iot -# with FIPS on for CentOS/RHEL 8, while we do these with FIPS off -# for CentOS/RHEL 6/7. - -kitchen_centos_fips_install_script_agent-a6: - extends: - - .kitchen_os_with_cws - - .kitchen_scenario_centos_with_fips_a6 - - .kitchen_test_install_script_agent - -kitchen_centos_fips_install_script_agent-a7: - # Run install script test on branches, on a reduced number of platforms - rules: - !reference [.on_default_kitchen_tests_a7] - extends: - - .kitchen_os_with_cws - - .kitchen_scenario_centos_with_fips_a7 - - .kitchen_test_install_script_agent - -kitchen_centos_fips_install_script_iot_agent-a7: - extends: - - .kitchen_scenario_centos_with_fips_a7 - - .kitchen_test_install_script_iot_agent - -kitchen_centos_fips_install_script_dogstatsd-a7: - extends: - - .kitchen_scenario_centos_with_fips_a7 - - .kitchen_test_install_script_dogstatsd - # We only want to run step-by-step tests on deploy pipelines, # which is why they have a different rule (if_deploy_6/7) diff --git a/.gitlab/kitchen_tests_upload.yml b/.gitlab/kitchen_tests_upload.yml index e855dc7cc2f837..089fc2f29f1f2c 100644 --- a/.gitlab/kitchen_tests_upload.yml +++ b/.gitlab/kitchen_tests_upload.yml @@ -7,10 +7,6 @@ kitchen_tests_upload_common: - !reference [.except_mergequeue] - when: always dependencies: - - kitchen_centos_fips_install_script_agent-a6 - - kitchen_centos_fips_install_script_agent-a7 - - kitchen_centos_fips_install_script_dogstatsd-a7 - - kitchen_centos_fips_install_script_iot_agent-a7 - kitchen_centos_fips_upgrade6_agent-a6 - kitchen_centos_fips_upgrade6_agent-a7 - kitchen_centos_fips_upgrade7_agent-a7 diff --git a/.gitlab/new-e2e_testing/centos.yml b/.gitlab/new-e2e_testing/centos.yml index 6431136a7143ce..f60ae1754ead1c 100644 --- a/.gitlab/new-e2e_testing/centos.yml +++ b/.gitlab/new-e2e_testing/centos.yml @@ -21,6 +21,22 @@ E2E_OVERRIDE_INSTANCE_TYPE: "t2.medium" # CentOS 6 does not support ENA, so we cannot use t3 instances needs: ["deploy_rpm_testing-a7_x64"] +.new-e2e_centos-fips_a7_x86_64: + variables: + E2E_ARCH: x86_64 + E2E_OSVERS: "rhel-86-fips" + E2E_CWS_SUPPORTED_OSVERS: "rhel-86-fips" + E2E_BRANCH_OSVERS: "rhel-86-fips" + needs: ["deploy_rpm_testing-a7_x64"] + +.new-e2e_centos-fips_a6_x86_64: + variables: + E2E_ARCH: x86_64 + E2E_OSVERS: "rhel-86-fips" + E2E_CWS_SUPPORTED_OSVERS: "rhel-86-fips" + E2E_BRANCH_OSVERS: "rhel-86-fips" + needs: ["deploy_rpm_testing-a7_x64"] + new-e2e-agent-platform-install-script-centos-a6-x86_64: stage: kitchen_testing extends: @@ -68,3 +84,51 @@ new-e2e-agent-platform-install-script-centos-dogstatsd-a7-x86_64: - .new-e2e_agent_a7 variables: FLAVOR: datadog-dogstatsd + +new-e2e-agent-platform-install-script-centos-fips-a6-x86_64: + stage: kitchen_testing + extends: + - .new_e2e_template + - .new-e2e_install_script + - .new-e2e_os_centos + - .new-e2e_centos-fips_a6_x86_64 + - .new-e2e_agent_a6 + variables: + FLAVOR: datadog-agent + +new-e2e-agent-platform-install-script-centos-fips-a7-x86_64: + stage: kitchen_testing + extends: + - .new_e2e_template + - .new-e2e_install_script + - .new-e2e_os_centos + - .new-e2e_centos-fips_a7_x86_64 + - .new-e2e_agent_a7 + rules: + !reference [.on_default_new-e2e_tests_a7] + variables: + FLAVOR: datadog-agent + +new-e2e-agent-platform-install-script-centos-fips-iot-agent-a7-x86_64: + stage: kitchen_testing + extends: + - .new_e2e_template + - .new-e2e_install_script + - .new-e2e_os_centos + - .new-e2e_centos-fips_a7_x86_64 + - .new-e2e_agent_a7 + rules: + !reference [.on_default_new-e2e_tests_a7] + variables: + FLAVOR: datadog-iot-agent + +new-e2e-agent-platform-install-script-centos-fips-dogstatsd-a7-x86_64: + stage: kitchen_testing + extends: + - .new_e2e_template + - .new-e2e_install_script + - .new-e2e_os_centos + - .new-e2e_centos-fips_a7_x86_64 + - .new-e2e_agent_a7 + variables: + FLAVOR: datadog-dogstatsd diff --git a/.gitlab/trigger_release.yml b/.gitlab/trigger_release.yml index 672e7a12e8f728..024b13e50c4c47 100644 --- a/.gitlab/trigger_release.yml +++ b/.gitlab/trigger_release.yml @@ -20,7 +20,8 @@ # when triggered with major version 7 - source /root/.bashrc - export RELEASE_VERSION=$(inv agent.version --major-version 7 --url-safe --omnibus-format)-1 - - inv pipeline.trigger-child-pipeline --no-follow --project-name "DataDog/agent-release-management" --git-ref "main" --variables "ACTION,AUTO_RELEASE,BUILD_PIPELINE_ID,RELEASE_PRODUCT,RELEASE_VERSION,TARGET_REPO,TARGET_REPO_BRANCH" + - export GITLAB_TOKEN=$(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.gitlab_pipelines_scheduler_token --with-decryption --query "Parameter.Value" --out text) + - inv pipeline.trigger-child-pipeline --project-name "DataDog/agent-release-management" --git-ref "main" --variables "ACTION,AUTO_RELEASE,BUILD_PIPELINE_ID,RELEASE_PRODUCT,RELEASE_VERSION,TARGET_REPO,TARGET_REPO_BRANCH" dependencies: [] trigger_auto_staging_release: diff --git a/cmd/system-probe/modules/compliance.go b/cmd/system-probe/modules/compliance.go index 17778c16279d67..ff6d7468fec8a4 100644 --- a/cmd/system-probe/modules/compliance.go +++ b/cmd/system-probe/modules/compliance.go @@ -67,7 +67,9 @@ func (m *complianceModule) Register(router *module.Router) error { func (m *complianceModule) handleError(writer http.ResponseWriter, request *http.Request, status int, err error) { _ = log.Errorf("module compliance: failed to properly handle %s request: %s", request.URL.Path, err) + writer.Header().Set("Content-Type", "text/plain") writer.WriteHeader(status) + writer.Write([]byte(err.Error())) } func (m *complianceModule) handleScanDBConfig(writer http.ResponseWriter, request *http.Request) { @@ -78,7 +80,7 @@ func (m *complianceModule) handleScanDBConfig(writer http.ResponseWriter, reques qs := request.URL.Query() pid, err := strconv.ParseInt(qs.Get("pid"), 10, 32) if err != nil { - m.handleError(writer, request, http.StatusBadRequest, fmt.Errorf("pid query paramater is not an integer: %w", err)) + m.handleError(writer, request, http.StatusBadRequest, fmt.Errorf("pid query parameter is not an integer: %w", err)) return } diff --git a/cmd/system-probe/modules/compliance_test.go b/cmd/system-probe/modules/compliance_test.go index 5c00cc7af68561..74752ebc9bd2fa 100644 --- a/cmd/system-probe/modules/compliance_test.go +++ b/cmd/system-probe/modules/compliance_test.go @@ -28,15 +28,15 @@ func TestComplianceModuleNoProcess(t *testing.T) { { url := "/dbconfig" statusCode, _, respBody := doDBConfigRequest(t, url) + require.Contains(t, string(respBody), "pid query parameter is not an integer") require.Equal(t, http.StatusBadRequest, statusCode) - require.Len(t, respBody, 0) } { url := "/dbconfig?pid=0" statusCode, _, respBody := doDBConfigRequest(t, url) + require.Contains(t, "resource not found for pid=0", string(respBody)) require.Equal(t, http.StatusNotFound, statusCode) - require.Len(t, respBody, 0) } } @@ -55,25 +55,29 @@ func TestComplianceCheckModuleWithProcess(t *testing.T) { if err := json.Unmarshal(respBody, &resource); err != nil { t.Fatal(err) } - require.Nil(t, resource) + require.Equal(t, "db_postgresql", resource.Type) + require.Equal(t, "postgres", resource.Config.ProcessName) + require.NotEmpty(t, resource.Config.ProcessUser) + require.Equal(t, filepath.Join(tmp, "postgresql.conf"), resource.Config.ConfigFilePath) + require.NotEmpty(t, resource.Config.ConfigFileUser) + require.NotEmpty(t, resource.Config.ConfigFileGroup) + require.Equal(t, uint32(0600), resource.Config.ConfigFileMode) + require.Equal(t, map[string]interface{}{"foo": "bar"}, resource.Config.ConfigData) } func launchFakeProcess(ctx context.Context, t *testing.T, tmp, procname string) int { - // creates a symlink to /usr/bin/sleep to be able to create a fake - // postgres process. - sleepPath, err := exec.LookPath("sleep") - if err != nil { - t.Skipf("could not find sleep util") - } - fakePgPath := filepath.Join(tmp, procname) - if err := os.Symlink(sleepPath, fakePgPath); err != nil { - t.Fatalf("could not create fake process symlink: %v", err) + fakePgBinPath := filepath.Join(tmp, "postgres") + fakePgConfPath := filepath.Join(tmp, "postgresql.conf") + + if err := os.WriteFile(fakePgBinPath, []byte("#!/bin/bash\nsleep 10"), 0700); err != nil { + t.Fatal(err) } - if err := os.Chmod(fakePgPath, 0700); err != nil { - t.Fatalf("could not chmod fake process symlink: %v", err) + + if err := os.WriteFile(fakePgConfPath, []byte(`foo = 'bar'`), 0600); err != nil { + t.Fatal(err) } - cmd := exec.CommandContext(ctx, fakePgPath, "5") + cmd := exec.CommandContext(ctx, fakePgBinPath, fmt.Sprintf("--config-file=%s", fakePgConfPath)) if err := cmd.Start(); err != nil { t.Fatalf("could not start fake process %q: %v", procname, err) } diff --git a/docs/components/defining-components.md b/docs/components/defining-components.md index 9354290843238f..155523e2792791 100644 --- a/docs/components/defining-components.md +++ b/docs/components/defining-components.md @@ -1,45 +1,36 @@ # Defining Components +You can use the invoke task `inv new-component comp//` to generate a scaffold for your new component. + +Below is a description of the different folders and files of your component. + +Every public variable, function, struct, and interface of your component **must** be documented. Please refer to the [Documentation](#documentation) section below for details. + A component is defined in a dedicated package named `comp//`, where `` names the bundle that contains the component. The package must have the following defined in: - * `component.go` - * Extensive package-level documentation. - This should define, as precisely as possible, the behavior of the component, acting as a contract on which users of the component may depend. - See the "Documentation" section below for details. - + * `comp///component.go` * A team-name comment of the form `// team: `. This is used to generate CODEOWNERS information. - * `Component` -- the interface type implemented by the component. - This is the type by which other components will require this one via `fx`. + * `Component` -- The component interface. + This is the interface that other components can reference when declaring the component as a dependency via `fx`. It can be an empty interface, if there is no need for any methods. - It should have a formulaic doc string like `// Component is the component type.`, deferring documentation to the package docs. - All interface methods should be exported and thoroughly documented. - * `impl/your_filename.go` - * `Module` -- an `fx.Option` that can be included in the bundle's `Module` or an `fx.App` to make this component available. The `Module` is defined in a separate package from the component. This allows a package using only the component interface to not have to import the dependencies of the implementation of this component. - To assist with debugging, use `fxutil.Component(options...)`. - This item should have a formulaic doc string like `// Module defines the fx options for this component.` + * `comp///impl/.go` + * `Module` -- an `fx.Option` that can be included in the bundle's `Module` or an `fx.App` to make this component available. + The `Module` is defined in a separate package from the component, allowing a package to import the interface without having to import the entire implementation. + To assist with debugging, declare your Module using `fxutil.Component(options...)`. Components should not be nested; that is, no component's Go path should be a prefix of another component's Go path. ## Implementation -The completed `component.go` looks like this: +The Component interface and the `Module` definition are implemented in the file `comp///impl/.go`. -```go -// Package foo ... (detailed doc comment for the component) -package config +The Module definition function **must** be private. -// team: some-team-name +The completed `comp///impl/.go` looks like this: -// Component is the component type. -type Component interface { - // Foo is ... (detailed doc comment) - Foo(key string) string -} -``` -The completed `impl/your_filename.go` looks like this: ```go package config @@ -49,95 +40,48 @@ func Module() fxutil.Module { fx.Provide(newFoo), ) } -``` - -The Component interface is implemented in the same file as the module definition by an unexported type with a sensible name such as `launcher` or `provider` or the classic `foo`. - -```go -package config - -// Module defines the fx options for this component. -func Module() fxutil.Module { - return fxutil.Component( - fx.Provide(newFoo), - ) -} -type foo { - foos []string +type foo struct { + foos []string } type dependencies struct { - fx.In + fx.In - Log log.Component - Config config.Component - // ... + Log log.Component + Config config.Component + // ... } func newFoo(deps dependencies) Component { ... } -// Foo implements Component#Foo. +// foo implements Component#Foo. func (f *foo) Foo(key string) string { ... } ``` -The constructor `newFoo` is an `fx` constructor, so it can refer to other types and expect them to be automatically supplied. -See [Using Components](./using.md) for details. +The constructor `newFoo` is an `fx` constructor. It can refer to other dependencies and expect them to be automatically supplied via `fx`. -The constructor can return either `Component`, if it is infallible, or `(Component, error)`, if it could fail. +See [Using Components](./usage.md) for more details. + +The constructor can return either a `Component`, if it is infallible, or `(Component, error)`, if it could fail. In the second form, a non-nil error will crash the agent at startup with a message containing the error. It is possible, and often necessary, to return multiple values. If the list of return values grows unwieldy, `fx.Out` can be used to create an output struct. The constructor may call methods on other components, as long as the called method's documentation indicates it is OK. -You can use the invoke task `inv new-component comp//` to generate a pre-filled `component.go` file for the given component. - -## Documentation - -The documentation (both package-level and method-level) should include everything a user of the component needs to know. -In particular, any assumptions that might lead to panics if violated by the user should be clearly documented. - -Detailed documentation of how to avoid bugs in using a component is an indicator of excessive complexity and should be treated as a bug. -Simplifying the usage will improve the robustness of the Agent. - -Documentation should include: - -* Precise information on when each method may be called. - Can methods be called concurrently? - Are some methods invalid before the component has started? - Such assumptions are difficult to verify, so where possible try to make every method callable concurrently, at all times. - -* Precise information about data ownership of passed values and returned values. - Users can assume that any mutable value returned by a component will not be modified by the user or the component after it is returned. - Similarly, any mutable value passed to a component will not be later modified either by the component or the caller. - Any deviation from these defaults should be clearly documented. - - _Note: It can be surprisingly hard to avoid mutating data -- for example, `append(..)` surprisingly mutates its first argument. - It is also hard to detect these bugs, as they are often intermittent, cause silent data corruption, or introduce rare data races. - Where performance is not an issue, prefer to copy mutable input and outputs to avoid any potential bugs._ - -* Precise information about goroutines and blocking. - Users can assume that methods do not block indefinitely, so blocking methods should be documented as such. - Methods that invoke callbacks should be clear about how the callback is invoked, and what it might do. - For example, document whether the callback can block, and whether it might be called concurrently with other code. - -* Precise information about channels. - Is the channel buffered? - What happens if the channel is not read from quickly enough, or if reading stops? - Can the channel be closed by the sender, and if so, what does that mean? - ## Testing Support To support testing, components can optionally provide a mock implementation, with the following in: - * `component_mock.go` + * `comp///component_mock.go` * `Mock` -- the type implemented by the mock version of the component. This should embed `pkg.Component`, and provide additional exported methods for manipulating the mock for use by other packages. - * `impl/your_filename_mock.go` + * `comp///impl/_mock.go` * `MockModule` -- an `fx.Option` that can be included in a test `App` to get the component's mock implementation. -The `component_mock.go` looks like this: + +The `comp///component_mock.go` looks like this: ```go //go:build test @@ -145,15 +89,16 @@ package foo // Mock implements mock-specific methods. type Mock interface { - // Component methods are included in Mock. - Component + // Component methods are included in Mock. + Component - // AddedFoos returns the foos added by AddFoo calls on the mock implementation. - AddedFoos() []Foo + // AddedFoos returns the foos added by AddFoo calls on the mock implementation. + AddedFoos() []Foo } ``` -The `impl/your_filename_mock.go` looks like this: +The `comp///impl/_mock.go` looks like this: + ```go //go:build test @@ -162,7 +107,7 @@ package foo // MockModule defines the fx options for the mock component. func MockModule() fxutil.Module { return fxutil.Component( - fx.Provide(newMockFoo), + fx.Provide(newMock), ) } ``` @@ -176,9 +121,44 @@ func (m *mock) Foo(key string) string { ... } // AddedFoos implements Mock#AddedFoos. func (m *mock) AddedFoos() []Foo { ... } -func newFoo(deps dependencies) Component { - return &mock{ ... } +func newMock(deps dependencies) Component { + return &mock{ ... } } ``` -Users of the mock module can cast the `Component` to a `Mock` to access the mock methods, as described in [Using Components](./using.md). \ No newline at end of file +Users of the mock module can cast the `Component` to a `Mock` to access the mock methods, as described in [Using Components](./usage.md). + + +## Documentation + +The documentation (both package-level and method-level) should include everything a user of the component needs to know. +In particular, any assumptions that might lead to panics if violated by the user should be documented. + +Detailed documentation of how to avoid bugs in using a component is an indicator of excessive complexity and should be treated as a bug. +Simplifying the usage will improve the robustness of the Agent. + +Documentation should include: + +* Precise information on when each method may be called. + Can methods be called concurrently? + Are some methods invalid before the component has started? + Such assumptions are difficult to verify. Where possible, try to make every method callable concurrently, at all times. + +* Precise information about data ownership of passed values and returned values. + Users can assume that any mutable value returned by a component will not be modified by the user or the component after it is returned. + Similarly, any mutable value passed to a component will not be later modified either by the component or the caller. + Any deviation from these defaults should be documented. + + _Note: It can be surprisingly hard to avoid mutating data -- for example, `append(..)` surprisingly mutates its first argument. + It is also hard to detect these bugs, as they are often intermittent, cause silent data corruption, or introduce rare data races. + Where performance is not an issue, prefer to copy mutable input and outputs to avoid any potential bugs._ + +* Precise information about goroutines and blocking. + Users can assume that methods do not block indefinitely, so blocking methods should be documented as such. + Methods that invoke callbacks should be clear about how the callback is invoked, and what it might do. + For example, document whether the callback can block, and whether it might be called concurrently with other code. + +* Precise information about channels. + Is the channel buffered? + What happens if the channel is not read from quickly enough, or if reading stops? + Can the channel be closed by the sender, and if so, what does that mean? diff --git a/docs/components/using.md b/docs/components/usage.md similarity index 100% rename from docs/components/using.md rename to docs/components/usage.md diff --git a/pkg/autodiscovery/common/types/prometheus.go b/pkg/autodiscovery/common/types/prometheus.go index d96506ff2a51aa..cc8c24b90772fc 100644 --- a/pkg/autodiscovery/common/types/prometheus.go +++ b/pkg/autodiscovery/common/types/prometheus.go @@ -148,6 +148,10 @@ type InclExcl struct { // PrometheusScrapeChecksTransformer unmarshals a prometheus check. func PrometheusScrapeChecksTransformer(in string) ([]*PrometheusCheck, error) { + if in == "" { + return nil, nil + } + var promChecks []*PrometheusCheck if err := json.Unmarshal([]byte(in), &promChecks); err != nil { return promChecks, fmt.Errorf(`"prometheus_scrape.checks" can not be parsed: %v`, err) diff --git a/pkg/collector/corechecks/snmp/internal/report/interface_bandwidth_usage_rate.go b/pkg/collector/corechecks/snmp/internal/report/interface_bandwidth_usage_rate.go index 2371ad9c9d2c1e..787940e9c112aa 100644 --- a/pkg/collector/corechecks/snmp/internal/report/interface_bandwidth_usage_rate.go +++ b/pkg/collector/corechecks/snmp/internal/report/interface_bandwidth_usage_rate.go @@ -74,5 +74,5 @@ func (ibs InterfaceBandwidthState) calculateBandwidthUsageRate(fullIndex string, if ok { return 0, fmt.Errorf("ifSpeed changed from %d to %d for interface ID %s, no rate emitted", state.ifSpeed, ifSpeed, interfaceID) } - return 0, nil + return 0, fmt.Errorf("new entry made, no rate emitted for interface ID %s", interfaceID) } diff --git a/pkg/collector/corechecks/snmp/internal/report/interface_bandwidth_usage_rate_test.go b/pkg/collector/corechecks/snmp/internal/report/interface_bandwidth_usage_rate_test.go index 62f428f10a4904..d54fe0c23f874d 100644 --- a/pkg/collector/corechecks/snmp/internal/report/interface_bandwidth_usage_rate_test.go +++ b/pkg/collector/corechecks/snmp/internal/report/interface_bandwidth_usage_rate_test.go @@ -172,9 +172,10 @@ func Test_interfaceBandwidthState_calculateBandwidthUsageRate_errors(t *testing. expectedError error usageValue float64 newIfSpeed uint64 + ibs InterfaceBandwidthState }{ { - name: "snmp.ifBandwidthInUsage.Rate ifHCInOctets erred", + name: "snmp.ifBandwidthInUsage.Rate ifHCInOctets erred when interface speed changes", symbols: []profiledefinition.SymbolConfig{{OID: "1.3.6.1.2.1.31.1.1.1.6", Name: "ifHCInOctets"}}, fullIndex: "9", tags: []string{"abc"}, @@ -204,9 +205,10 @@ func Test_interfaceBandwidthState_calculateBandwidthUsageRate_errors(t *testing. // ((5000000 * 8) / (100 * 1000000)) * 100 = 40.0 usageValue: 40, newIfSpeed: uint64(100) * (1e6), + ibs: interfaceRateMapWithPrevious(), }, { - name: "snmp.ifBandwidthOutUsage.Rate ifHCOutOctets erred", + name: "snmp.ifBandwidthOutUsage.Rate ifHCOutOctets erred when interface speed changes", symbols: []profiledefinition.SymbolConfig{{OID: "1.3.6.1.2.1.31.1.1.1.10", Name: "ifHCOutOctets"}}, fullIndex: "9", values: &valuestore.ResultValueStore{ @@ -235,6 +237,7 @@ func Test_interfaceBandwidthState_calculateBandwidthUsageRate_errors(t *testing. // ((1000000 * 8) / (100 * 1000000)) * 100 = 8.0 usageValue: 8, newIfSpeed: uint64(100) * (1e6), + ibs: interfaceRateMapWithPrevious(), }, { name: "snmp.ifBandwidthInUsage.Rate ifHCInOctets error on negative rate", @@ -268,6 +271,40 @@ func Test_interfaceBandwidthState_calculateBandwidthUsageRate_errors(t *testing. usageValue: 4, // keep it the same interface speed, testing if the rate is negative only newIfSpeed: uint64(80) * (1e6), + ibs: interfaceRateMapWithPrevious(), + }, + { + name: "snmp.ifBandwidthInUsage.Rate ifHCInOctets error if new entry (to not send sample)", + symbols: []profiledefinition.SymbolConfig{{OID: "1.3.6.1.2.1.31.1.1.1.6", Name: "ifHCInOctets"}}, + fullIndex: "9", + tags: []string{"abc"}, + values: &valuestore.ResultValueStore{ + ColumnValues: valuestore.ColumnResultValuesType{ + // ifHCInOctets + "1.3.6.1.2.1.31.1.1.1.6": map[string]valuestore.ResultValue{ + "9": { + Value: 5000000.0, + }, + }, + // ifHCOutOctets + "1.3.6.1.2.1.31.1.1.1.10": map[string]valuestore.ResultValue{ + "9": { + Value: 1000000.0, + }, + }, + // ifHighSpeed + "1.3.6.1.2.1.31.1.1.1.15": map[string]valuestore.ResultValue{ + "9": { + Value: 100.0, + }, + }, + }, + }, + expectedError: fmt.Errorf("new entry made, no rate emitted for interface ID %s", "9.ifBandwidthInUsage"), + // ((5000000 * 8) / (100 * 1000000)) * 100 = 40.0 + usageValue: 40, + newIfSpeed: uint64(100) * (1e6), + ibs: MakeInterfaceBandwidthState(), }, } for _, tt := range tests { @@ -279,7 +316,7 @@ func Test_interfaceBandwidthState_calculateBandwidthUsageRate_errors(t *testing. ms := &MetricSender{ sender: sender, interfaceConfigs: tt.interfaceConfigs, - interfaceBandwidthState: interfaceRateMapWithPrevious(), + interfaceBandwidthState: tt.ibs, } for _, symbol := range tt.symbols { usageName := bandwidthMetricNameToUsage[symbol.Name] diff --git a/pkg/compliance/dbconfig/loader.go b/pkg/compliance/dbconfig/loader.go index c3fbe07cceb153..2ca5d47b1d9a04 100644 --- a/pkg/compliance/dbconfig/loader.go +++ b/pkg/compliance/dbconfig/loader.go @@ -89,11 +89,7 @@ func ListProcesses(ctx context.Context) map[utils.ContainerID]int32 { if !ok { continue } - - containerID, ok := utils.GetProcessContainerID(proc.Pid) - if !ok { - continue - } + containerID, _ := utils.GetProcessContainerID(proc.Pid) // We dedupe our scans based on the resource type and the container // ID, assuming that we will scan the same configuration for each // containers running the process. @@ -119,10 +115,7 @@ func LoadDBResourceFromPID(ctx context.Context, pid int32) (resource *DBResource if !ok { return } - containerID, ok := utils.GetProcessContainerID(pid) - if !ok { - return - } + containerID, _ := utils.GetProcessContainerID(pid) hostroot, ok := utils.GetProcessRootPath(pid) if !ok { return diff --git a/pkg/config/model/viper.go b/pkg/config/model/viper.go index ed912d79e659e2..ed163c0ae64fd5 100644 --- a/pkg/config/model/viper.go +++ b/pkg/config/model/viper.go @@ -14,10 +14,11 @@ import ( "sync" "time" - "github.com/DataDog/datadog-agent/pkg/util/log" "github.com/DataDog/viper" "github.com/spf13/afero" "github.com/spf13/pflag" + + "github.com/DataDog/datadog-agent/pkg/util/log" ) // Source stores what edits a setting as a string @@ -84,8 +85,7 @@ func (c *safeConfig) Set(key string, value interface{}, source Source) { c.mergeViperInstances(key) } -// SetWithoutSource wraps Viper for concurrent access. -// It's the default Viper Set() method. +// SetWithoutSource sets the given value using source Unknown func (c *safeConfig) SetWithoutSource(key string, value interface{}) { c.Set(key, value, SourceUnknown) } @@ -126,12 +126,9 @@ func (c *safeConfig) SetKnown(key string) { c.Viper.SetKnown(key) } -// IsKnown adds a key to the set of known valid config keys +// IsKnown returns whether a key is known func (c *safeConfig) IsKnown(key string) bool { - c.Lock() - defer c.Unlock() - - keys := c.Viper.GetKnownKeys() + keys := c.GetKnownKeys() _, ok := keys[key] return ok } @@ -139,8 +136,8 @@ func (c *safeConfig) IsKnown(key string) bool { // GetKnownKeys returns all the keys that meet at least one of these criteria: // 1) have a default, 2) have an environment variable binded or 3) have been SetKnown() func (c *safeConfig) GetKnownKeys() map[string]interface{} { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() // GetKnownKeys returns a fresh map, so the caller may do with it // as they please without holding the lock. @@ -195,7 +192,7 @@ func (c *safeConfig) IsSectionSet(section string) bool { } } - // Is none of the keys are set, the section is still considered as set + // If none of the keys are set, the section is still considered as set // if it has been explicitly set in the config. return c.IsSet(section) } @@ -219,6 +216,8 @@ func (c *safeConfig) Get(key string) interface{} { // GetAllSources returns the value of a key for each source func (c *safeConfig) GetAllSources(key string) []ValueWithSource { + c.RLock() + defer c.RUnlock() vals := make([]ValueWithSource, len(sources)) for i, source := range sources { vals[i] = ValueWithSource{ @@ -418,6 +417,7 @@ func (c *safeConfig) SetEnvPrefix(in string) { } // mergeWithEnvPrefix derives the environment variable that Viper will use for a given key. +// mergeWithEnvPrefix must be called while holding the config log (read or write). func (c *safeConfig) mergeWithEnvPrefix(key string) string { return strings.Join([]string{c.envPrefix, strings.ToUpper(key)}, "_") } @@ -450,8 +450,8 @@ func (c *safeConfig) BindEnv(input ...string) { // SetEnvKeyReplacer wraps Viper for concurrent access func (c *safeConfig) SetEnvKeyReplacer(r *strings.Replacer) { - c.RLock() - defer c.RUnlock() + c.Lock() + defer c.Unlock() c.configSources[SourceEnvVar].SetEnvKeyReplacer(r) c.Viper.SetEnvKeyReplacer(r) c.envKeyReplacer = r @@ -459,22 +459,22 @@ func (c *safeConfig) SetEnvKeyReplacer(r *strings.Replacer) { // UnmarshalKey wraps Viper for concurrent access func (c *safeConfig) UnmarshalKey(key string, rawVal interface{}, opts ...viper.DecoderConfigOption) error { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() return c.Viper.UnmarshalKey(key, rawVal, opts...) } // Unmarshal wraps Viper for concurrent access func (c *safeConfig) Unmarshal(rawVal interface{}) error { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() return c.Viper.Unmarshal(rawVal) } // UnmarshalExact wraps Viper for concurrent access func (c *safeConfig) UnmarshalExact(rawVal interface{}) error { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() return c.Viper.UnmarshalExact(rawVal) } @@ -522,8 +522,8 @@ func (c *safeConfig) MergeConfigMap(cfg map[string]any) error { // AllSettings wraps Viper for concurrent access func (c *safeConfig) AllSettings() map[string]interface{} { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() // AllSettings returns a fresh map, so the caller may do with it // as they please without holding the lock. @@ -532,8 +532,8 @@ func (c *safeConfig) AllSettings() map[string]interface{} { // AllSettingsWithoutDefault wraps Viper for concurrent access func (c *safeConfig) AllSettingsWithoutDefault() map[string]interface{} { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() // AllSettingsWithoutDefault returns a fresh map, so the caller may do with it // as they please without holding the lock. @@ -542,8 +542,8 @@ func (c *safeConfig) AllSettingsWithoutDefault() map[string]interface{} { // AllFileSettingsWithoutDefault wraps Viper for concurrent access func (c *safeConfig) AllFileSettingsWithoutDefault() map[string]interface{} { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() // AllFileSettingsWithoutDefault returns a fresh map, so the caller may do with it // as they please without holding the lock. @@ -552,8 +552,8 @@ func (c *safeConfig) AllFileSettingsWithoutDefault() map[string]interface{} { // AllEnvVarSettingsWithoutDefault wraps Viper for concurrent access func (c *safeConfig) AllEnvVarSettingsWithoutDefault() map[string]interface{} { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() // AllEnvVarSettingsWithoutDefault returns a fresh map, so the caller may do with it // as they please without holding the lock. @@ -562,8 +562,8 @@ func (c *safeConfig) AllEnvVarSettingsWithoutDefault() map[string]interface{} { // AllAgentRuntimeSettingsWithoutDefault wraps Viper for concurrent access func (c *safeConfig) AllAgentRuntimeSettingsWithoutDefault() map[string]interface{} { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() // AllAgentRuntimeSettingsWithoutDefault returns a fresh map, so the caller may do with it // as they please without holding the lock. @@ -572,8 +572,8 @@ func (c *safeConfig) AllAgentRuntimeSettingsWithoutDefault() map[string]interfac // AllRemoteSettingsWithoutDefault wraps Viper for concurrent access func (c *safeConfig) AllRemoteSettingsWithoutDefault() map[string]interface{} { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() // AllRemoteSettingsWithoutDefault returns a fresh map, so the caller may do with it // as they please without holding the lock. @@ -582,8 +582,8 @@ func (c *safeConfig) AllRemoteSettingsWithoutDefault() map[string]interface{} { // AllCliSettingsWithoutDefault wraps Viper for concurrent access func (c *safeConfig) AllCliSettingsWithoutDefault() map[string]interface{} { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() // AllCliSettingsWithoutDefault returns a fresh map, so the caller may do with it // as they please without holding the lock. @@ -622,8 +622,8 @@ func (c *safeConfig) SetConfigType(in string) { // ConfigFileUsed wraps Viper for concurrent access func (c *safeConfig) ConfigFileUsed() string { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() return c.Viper.ConfigFileUsed() } @@ -645,6 +645,8 @@ func (c *safeConfig) BindPFlag(key string, flag *pflag.Flag) error { // GetEnvVars implements the Config interface func (c *safeConfig) GetEnvVars() []string { + c.RLock() + defer c.RUnlock() vars := make([]string, 0, len(c.configEnvVars)) for v := range c.configEnvVars { vars = append(vars, v) @@ -687,7 +689,7 @@ func NewConfig(name string, envPrefix string, envKeyReplacer *strings.Replacer) return &config } -// CopyConfig copies the internal config to the current config. This should only be used in tests as replacing +// CopyConfig copies the given config to the receiver config. This should only be used in tests as replacing // the global config reference is unsafe. func (c *safeConfig) CopyConfig(cfg Config) { c.Lock() diff --git a/pkg/ebpf/bytecode/runtime/.gitignore b/pkg/ebpf/bytecode/runtime/.gitignore new file mode 100644 index 00000000000000..a4383358ec72fb --- /dev/null +++ b/pkg/ebpf/bytecode/runtime/.gitignore @@ -0,0 +1 @@ +*.d diff --git a/pkg/ebpf/bytecode/runtime/integrity.go b/pkg/ebpf/bytecode/runtime/integrity.go index 60ea650c5d7942..ffb6fc6cfc79a1 100644 --- a/pkg/ebpf/bytecode/runtime/integrity.go +++ b/pkg/ebpf/bytecode/runtime/integrity.go @@ -46,14 +46,14 @@ func main() { log.Fatalf("unable to resolve path to %s: %s", args[1], err) } - err = genIntegrity(inputFile, outputFile, args[2]) + err = genIntegrity(root, inputFile, outputFile, args[2]) if err != nil { log.Fatalf("error generating integrity: %s", err) } fmt.Printf("successfully generated from %s => %s\n", inputFile, outputFile) } -func genIntegrity(inputFile, outputFile, pkg string) error { +func genIntegrity(root, inputFile, outputFile, pkg string) error { hash, err := hashFile(inputFile) if err != nil { return err @@ -69,6 +69,25 @@ func genIntegrity(inputFile, outputFile, pkg string) error { } defer f.Close() + depsFile := fmt.Sprintf("%s.d", outputFile) + odeps, err := os.Create(depsFile) + if err != nil { + return fmt.Errorf("error opening output deps file: %s", err) + } + defer odeps.Close() + + relIn, err := filepath.Rel(root, inputFile) + if err != nil { + return fmt.Errorf("error getting relative input path: %s", err) + } + relOut, err := filepath.Rel(root, outputFile) + if err != nil { + return fmt.Errorf("error getting relative output path: %s", err) + } + odeps.WriteString(fmt.Sprintf("%s: \\\n", relOut)) + odeps.WriteString(fmt.Sprintf(" %s", relIn)) + odeps.WriteString("\n") + base := filepath.Base(inputFile) caser := cases.Title(language.English, cases.NoLower) diff --git a/pkg/gohai/filesystem/filesystem_nix.go b/pkg/gohai/filesystem/filesystem_nix.go index 6f85acfa1ca850..aa36d39e7a2908 100644 --- a/pkg/gohai/filesystem/filesystem_nix.go +++ b/pkg/gohai/filesystem/filesystem_nix.go @@ -94,7 +94,7 @@ func getFileSystemInfoWithMounts(initialMounts []*mountinfo.Info, sizeKB, dev fs sizeKB, err := sizeKB(mount) if err != nil { - log.Info(err) + log.Debug(err) continue } @@ -111,7 +111,7 @@ func getFileSystemInfoWithMounts(initialMounts []*mountinfo.Info, sizeKB, dev fs dev, err := dev(mount) if err != nil { - log.Info(err) + log.Debug(err) continue } diff --git a/pkg/network/tracer/offsetguess/conntrack.go b/pkg/network/tracer/offsetguess/conntrack.go index 2a7da636738de9..9bc13098d27456 100644 --- a/pkg/network/tracer/offsetguess/conntrack.go +++ b/pkg/network/tracer/offsetguess/conntrack.go @@ -29,8 +29,15 @@ import ( "github.com/DataDog/datadog-agent/pkg/util/log" ) -// sizeof(struct nf_conntrack_tuple), see https://github.com/torvalds/linux/blob/master/include/net/netfilter/nf_conntrack_tuple.h -const sizeofNfConntrackTuple = 40 +const ( + // sizeof(struct nf_conntrack_tuple), see https://github.com/torvalds/linux/blob/master/include/net/netfilter/nf_conntrack_tuple.h + sizeofNfConntrackTuple = 40 + + // sizeof(struct nf_conntrack_tuple_hash), see https://github.com/torvalds/linux/blob/master/include/net/netfilter/nf_conntrack_tuple.h + sizeofNfConntrackTupleHash = 56 +) + +var localIPv4 = net.ParseIP("127.0.0.3") type conntrackOffsetGuesser struct { m *manager.Manager @@ -354,7 +361,18 @@ func (e *conntrackEventGenerator) Generate(status GuessWhat, expected *fieldValu } var err error err = kernel.WithNS(e.ns, func() error { - e.udpConn, err = net.DialTimeout("udp4", e.udpAddr, 500*time.Millisecond) + // we use a dialer instance to override the local + // address to use with the udp connection. this is + // because on kernel 4.4 using the default loopback + // (127.0.0.1) address sometimes results in an + // incorrect match for the source address, resulting + // in an incorrect offset for ct_origin + d := net.Dialer{ + Timeout: 500 * time.Millisecond, + LocalAddr: &net.UDPAddr{IP: localIPv4}, + } + + e.udpConn, err = d.Dial("udp4", e.udpAddr) if err != nil { return err } diff --git a/pkg/network/tracer/offsetguess/overlaps.go b/pkg/network/tracer/offsetguess/overlaps.go index 5cc058ddc66570..c29faed25f4530 100644 --- a/pkg/network/tracer/offsetguess/overlaps.go +++ b/pkg/network/tracer/offsetguess/overlaps.go @@ -121,7 +121,7 @@ func (c *conntrackOffsetGuesser) nfConnRanges() []offsetRange { }, GuessWhat(c.status.What)) return []offsetRange{ - {c.status.Offset_origin, sizeofNfConntrackTuple}, + {c.status.Offset_origin, sizeofNfConntrackTupleHash}, {c.status.Offset_reply, sizeofNfConntrackTuple}, {c.status.Offset_netns, uint64(unsafe.Sizeof(c.status.Netns))}, }[:idx] diff --git a/pkg/network/tracer/offsetguess/tracer.go b/pkg/network/tracer/offsetguess/tracer.go index de5408a674252b..8e2b9f06aafd86 100644 --- a/pkg/network/tracer/offsetguess/tracer.go +++ b/pkg/network/tracer/offsetguess/tracer.go @@ -37,11 +37,11 @@ import ( "github.com/DataDog/datadog-agent/pkg/util/native" ) -//nolint:revive // TODO(NET) Fix revive linter -const InterfaceLocalMulticastIPv6 = "ff01::1" -const listenIPv4 = "127.0.0.2" - const ( + // InterfaceLocalMulticastIPv6 is a destination IPv6 address used for offset guessing + InterfaceLocalMulticastIPv6 = "ff01::1" + listenIPv4 = "127.0.0.2" + tcpGetSockOptKProbeNotCalled uint64 = 0 tcpGetSockOptKProbeCalled uint64 = 1 ) diff --git a/pkg/security/probe/field_handlers_ebpfless.go b/pkg/security/probe/field_handlers_ebpfless.go index 2b873788152c5c..b1ccd556d359a7 100644 --- a/pkg/security/probe/field_handlers_ebpfless.go +++ b/pkg/security/probe/field_handlers_ebpfless.go @@ -40,7 +40,10 @@ func (fh *EBPFLessFieldHandlers) GetProcessService(ev *model.Event) string { // ResolveProcessCacheEntry queries the ProcessResolver to retrieve the ProcessContext of the event func (fh *EBPFLessFieldHandlers) ResolveProcessCacheEntry(ev *model.Event) (*model.ProcessCacheEntry, bool) { if ev.ProcessCacheEntry == nil && ev.PIDContext.Pid != 0 { - ev.ProcessCacheEntry = fh.resolvers.ProcessResolver.Resolve(ev.PIDContext.Pid) + ev.ProcessCacheEntry = fh.resolvers.ProcessResolver.Resolve(sprocess.CacheResolverKey{ + Pid: ev.PIDContext.Pid, + NSID: ev.NSID, + }) } if ev.ProcessCacheEntry == nil { @@ -130,7 +133,10 @@ func (fh *EBPFLessFieldHandlers) ResolveProcessEnvs(_ *model.Event, process *mod // GetProcessCacheEntry queries the ProcessResolver to retrieve the ProcessContext of the event func (fh *EBPFLessFieldHandlers) GetProcessCacheEntry(ev *model.Event) (*model.ProcessCacheEntry, bool) { - ev.ProcessCacheEntry = fh.resolvers.ProcessResolver.Resolve(ev.PIDContext.Pid) + ev.ProcessCacheEntry = fh.resolvers.ProcessResolver.Resolve(sprocess.CacheResolverKey{ + Pid: ev.PIDContext.Pid, + NSID: ev.NSID, + }) if ev.ProcessCacheEntry == nil { ev.ProcessCacheEntry = model.GetPlaceholderProcessCacheEntry(ev.PIDContext.Pid, ev.PIDContext.Pid, false) return ev.ProcessCacheEntry, false diff --git a/pkg/security/probe/probe_epbfless.go b/pkg/security/probe/probe_epbfless.go index 1089d30c198ba1..36ba163012be92 100644 --- a/pkg/security/probe/probe_epbfless.go +++ b/pkg/security/probe/probe_epbfless.go @@ -23,6 +23,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/security/probe/kfilters" "github.com/DataDog/datadog-agent/pkg/security/proto/ebpfless" "github.com/DataDog/datadog-agent/pkg/security/resolvers" + "github.com/DataDog/datadog-agent/pkg/security/resolvers/process" "github.com/DataDog/datadog-agent/pkg/security/secl/compiler/eval" "github.com/DataDog/datadog-agent/pkg/security/secl/model" "github.com/DataDog/datadog-agent/pkg/security/secl/rules" @@ -67,15 +68,16 @@ func (p *EBPFLessProbe) handleSyscallMsg(syscallMsg *ebpfless.SyscallMsg) { p.seqNum++ event := p.zeroEvent() + event.NSID = syscallMsg.NSID switch syscallMsg.Type { case ebpfless.SyscallTypeExec: event.Type = uint32(model.ExecEventType) - entry := p.Resolvers.ProcessResolver.AddExecEntry(syscallMsg.PID, syscallMsg.Exec.Filename, syscallMsg.Exec.Args, syscallMsg.Exec.Envs, syscallMsg.ContainerContext.ID) + entry := p.Resolvers.ProcessResolver.AddExecEntry(process.CacheResolverKey{Pid: syscallMsg.PID, NSID: syscallMsg.NSID}, syscallMsg.Exec.Filename, syscallMsg.Exec.Args, syscallMsg.Exec.Envs, syscallMsg.ContainerContext.ID) event.Exec.Process = &entry.Process case ebpfless.SyscallTypeFork: event.Type = uint32(model.ForkEventType) - p.Resolvers.ProcessResolver.AddForkEntry(syscallMsg.PID, syscallMsg.Fork.PPID) + p.Resolvers.ProcessResolver.AddForkEntry(process.CacheResolverKey{Pid: syscallMsg.PID, NSID: syscallMsg.NSID}, syscallMsg.Fork.PPID) case ebpfless.SyscallTypeOpen: event.Type = uint32(model.FileOpenEventType) event.Open.File.PathnameStr = syscallMsg.Open.Filename @@ -93,7 +95,7 @@ func (p *EBPFLessProbe) handleSyscallMsg(syscallMsg *ebpfless.SyscallMsg) { } // use ProcessCacheEntry process context as process context - event.ProcessCacheEntry = p.Resolvers.ProcessResolver.Resolve(syscallMsg.PID) + event.ProcessCacheEntry = p.Resolvers.ProcessResolver.Resolve(process.CacheResolverKey{Pid: syscallMsg.PID, NSID: syscallMsg.NSID}) if event.ProcessCacheEntry == nil { event.ProcessCacheEntry = model.NewPlaceholderProcessCacheEntry(syscallMsg.PID, syscallMsg.PID, false) } diff --git a/pkg/security/proto/ebpfless/msg.go b/pkg/security/proto/ebpfless/msg.go index 2df97cd52c246a..c8a51ca0bc6114 100644 --- a/pkg/security/proto/ebpfless/msg.go +++ b/pkg/security/proto/ebpfless/msg.go @@ -75,6 +75,7 @@ type ChdirSyscallFakeMsg struct { // SyscallMsg defines a syscall message type SyscallMsg struct { SeqNum uint64 + NSID uint64 Type SyscallType PID uint32 ContainerContext *ContainerContext diff --git a/pkg/security/ptracer/cws.go b/pkg/security/ptracer/cws.go index 5771c6b10fb16e..ff2891fcea0531 100644 --- a/pkg/security/ptracer/cws.go +++ b/pkg/security/ptracer/cws.go @@ -271,7 +271,8 @@ func retrieveECSMetadata(ctx *ebpfless.ContainerContext) error { return err } - if data.DockerID != "" { + if data.DockerID != "" && ctx.ID == "" { + // only set the container ID if we previously failed to retrieve it from proc ctx.ID = data.DockerID } if data.DockerName != "" { @@ -355,6 +356,9 @@ func StartCWSPtracer(args []string, probeAddr string, verbose bool) error { } var containerCtx ebpfless.ContainerContext + if err := retrieveContainerIDFromProc(&containerCtx); err != nil { + logErrorf("Retrieve container ID from proc failed: %v\n", err) + } if err := retrieveECSMetadata(&containerCtx); err != nil { return err } @@ -370,6 +374,7 @@ func StartCWSPtracer(args []string, probeAddr string, verbose bool) error { return err } + nsid := getNSID() msgChan := make(chan *ebpfless.SyscallMsg, 10000) traceChan := make(chan bool) @@ -385,6 +390,7 @@ func StartCWSPtracer(args []string, probeAddr string, verbose bool) error { for msg := range msgChan { msg.SeqNum = seq + msg.NSID = nsid logDebugf("sending message: %s", msg) diff --git a/pkg/security/ptracer/utils.go b/pkg/security/ptracer/utils.go new file mode 100644 index 00000000000000..65e3df73afe524 --- /dev/null +++ b/pkg/security/ptracer/utils.go @@ -0,0 +1,109 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build linux + +// Package ptracer holds the start command of CWS injector +package ptracer + +import ( + "bufio" + "bytes" + "math/rand" + "os" + "strconv" + "strings" + "syscall" + "unicode" + + "github.com/DataDog/datadog-agent/pkg/security/proto/ebpfless" +) + +// Funcs mainly copied from github.com/DataDog/datadog-agent/pkg/security/utils/cgroup.go +// in order to reduce the binary size of cws-instrumentation + +type controlGroup struct { + // id unique hierarchy ID + id int + + // controllers are the list of cgroup controllers bound to the hierarchy + controllers []string + + // path is the pathname of the control group to which the process + // belongs. It is relative to the mountpoint of the hierarchy. + path string +} + +func getProcControlGroupsFromFile(path string) ([]controlGroup, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + var cgroups []controlGroup + scanner := bufio.NewScanner(bytes.NewReader(data)) + for scanner.Scan() { + t := scanner.Text() + parts := strings.Split(t, ":") + var ID int + ID, err = strconv.Atoi(parts[0]) + if err != nil { + continue + } + c := controlGroup{ + id: ID, + controllers: strings.Split(parts[1], ","), + path: parts[2], + } + cgroups = append(cgroups, c) + } + return cgroups, nil + +} + +func getContainerID(path string) string { + // path is in form of: /docker/580f75c027f19bf3a59de881b056829f214fc1d78961c164e8669485ef5b0dd5 + // -> len(/docker/) = 8, +64 of container ID + if len(path) != 8+64 { // check length + return "" + } + for _, i := range path[8:] { // check that it's only numeric + if !unicode.IsLetter(i) && !unicode.IsDigit(i) { + return "" + } + } + return path[8:] +} + +func getCurrentProcContainerID() (string, error) { + cgroups, err := getProcControlGroupsFromFile("/proc/self/cgroup") + if err != nil { + return "", err + } + + for _, cgroup := range cgroups { + cid := getContainerID(cgroup.path) + if cid != "" { + return cid, nil + } + } + return "", nil +} + +func retrieveContainerIDFromProc(ctx *ebpfless.ContainerContext) error { + cgroup, err := getCurrentProcContainerID() + if err != nil { + return err + } + ctx.ID = cgroup + return nil +} + +func getNSID() uint64 { + var stat syscall.Stat_t + if err := syscall.Lstat("/proc/self/ns/pid", &stat); err != nil { + return rand.Uint64() + } + return stat.Ino +} diff --git a/pkg/security/resolvers/process/resolver_ebpfless.go b/pkg/security/resolvers/process/resolver_ebpfless.go index 6658731d27891e..fe0da8f9735638 100644 --- a/pkg/security/resolvers/process/resolver_ebpfless.go +++ b/pkg/security/resolvers/process/resolver_ebpfless.go @@ -26,10 +26,16 @@ import ( "github.com/DataDog/datadog-agent/pkg/security/secl/model" ) +// CacheResolverKey is used to store and retrieve processes from the cache +type CacheResolverKey struct { + Pid uint32 // Pid of the related process (namespaced) + NSID uint64 // NSID represents the pids namespace ID of the related container +} + // EBPFLessResolver defines a resolver type EBPFLessResolver struct { sync.RWMutex - entryCache map[uint32]*model.ProcessCacheEntry + entryCache map[CacheResolverKey]*model.ProcessCacheEntry opts ResolverOpts scrubber *procutil.DataScrubber statsdClient statsd.ClientInterface @@ -43,7 +49,7 @@ type EBPFLessResolver struct { // NewEBPFLessResolver returns a new process resolver func NewEBPFLessResolver(_ *config.Config, statsdClient statsd.ClientInterface, scrubber *procutil.DataScrubber, opts *ResolverOpts) (*EBPFLessResolver, error) { p := &EBPFLessResolver{ - entryCache: make(map[uint32]*model.ProcessCacheEntry), + entryCache: make(map[CacheResolverKey]*model.ProcessCacheEntry), opts: *opts, scrubber: scrubber, cacheSize: atomic.NewInt64(0), @@ -55,43 +61,43 @@ func NewEBPFLessResolver(_ *config.Config, statsdClient statsd.ClientInterface, return p, nil } -func (p *EBPFLessResolver) deleteEntry(pid uint32, exitTime time.Time) { - entry, ok := p.entryCache[pid] +func (p *EBPFLessResolver) deleteEntry(key CacheResolverKey, exitTime time.Time) { + entry, ok := p.entryCache[key] if !ok { return } entry.Exit(exitTime) - delete(p.entryCache, entry.Pid) + delete(p.entryCache, key) entry.Release() } // DeleteEntry tries to delete an entry in the process cache -func (p *EBPFLessResolver) DeleteEntry(pid uint32, exitTime time.Time) { +func (p *EBPFLessResolver) DeleteEntry(key CacheResolverKey, exitTime time.Time) { p.Lock() defer p.Unlock() - p.deleteEntry(pid, exitTime) + p.deleteEntry(key, exitTime) } // AddForkEntry adds an entry to the local cache and returns the newly created entry -func (p *EBPFLessResolver) AddForkEntry(pid uint32, ppid uint32) *model.ProcessCacheEntry { +func (p *EBPFLessResolver) AddForkEntry(key CacheResolverKey, ppid uint32) *model.ProcessCacheEntry { entry := p.processCacheEntryPool.Get() - entry.PIDContext.Pid = pid + entry.PIDContext.Pid = key.Pid entry.PPid = ppid p.Lock() defer p.Unlock() - p.insertForkEntry(entry) + p.insertForkEntry(key, entry) return entry } // AddExecEntry adds an entry to the local cache and returns the newly created entry -func (p *EBPFLessResolver) AddExecEntry(pid uint32, file string, argv []string, envs []string, ctrID string) *model.ProcessCacheEntry { +func (p *EBPFLessResolver) AddExecEntry(key CacheResolverKey, file string, argv []string, envs []string, ctrID string) *model.ProcessCacheEntry { entry := p.processCacheEntryPool.Get() - entry.PIDContext.Pid = pid + entry.PIDContext.Pid = key.Pid entry.Process.ArgsEntry = &model.ArgsEntry{Values: argv} if len(argv) > 0 { @@ -111,13 +117,13 @@ func (p *EBPFLessResolver) AddExecEntry(pid uint32, file string, argv []string, p.Lock() defer p.Unlock() - p.insertExecEntry(entry) + p.insertExecEntry(key, entry) return entry } -func (p *EBPFLessResolver) insertEntry(entry, prev *model.ProcessCacheEntry) { - p.entryCache[entry.Pid] = entry +func (p *EBPFLessResolver) insertEntry(key CacheResolverKey, entry, prev *model.ProcessCacheEntry) { + p.entryCache[key] = entry entry.Retain() if prev != nil { @@ -127,37 +133,40 @@ func (p *EBPFLessResolver) insertEntry(entry, prev *model.ProcessCacheEntry) { p.cacheSize.Inc() } -func (p *EBPFLessResolver) insertForkEntry(entry *model.ProcessCacheEntry) { - prev := p.entryCache[entry.Pid] +func (p *EBPFLessResolver) insertForkEntry(key CacheResolverKey, entry *model.ProcessCacheEntry) { + prev := p.entryCache[key] if prev != nil { // this shouldn't happen but it is better to exit the prev and let the new one replace it prev.Exit(entry.ForkTime) } if entry.Pid != 1 { - parent := p.entryCache[entry.PPid] + parent := p.entryCache[CacheResolverKey{ + Pid: entry.PPid, + NSID: key.NSID, + }] if parent != nil { parent.Fork(entry) } } - p.insertEntry(entry, prev) + p.insertEntry(key, entry, prev) } -func (p *EBPFLessResolver) insertExecEntry(entry *model.ProcessCacheEntry) { - prev := p.entryCache[entry.Pid] +func (p *EBPFLessResolver) insertExecEntry(key CacheResolverKey, entry *model.ProcessCacheEntry) { + prev := p.entryCache[key] if prev != nil { prev.Exec(entry) } - p.insertEntry(entry, prev) + p.insertEntry(key, entry, prev) } // Resolve returns the cache entry for the given pid -func (p *EBPFLessResolver) Resolve(pid uint32) *model.ProcessCacheEntry { +func (p *EBPFLessResolver) Resolve(key CacheResolverKey) *model.ProcessCacheEntry { p.Lock() defer p.Unlock() - if e, ok := p.entryCache[pid]; ok { + if e, ok := p.entryCache[key]; ok { return e } return nil diff --git a/pkg/security/secl/model/model_unix.go b/pkg/security/secl/model/model_unix.go index ef61e5a8c3b9cb..99964cec6e182e 100644 --- a/pkg/security/secl/model/model_unix.go +++ b/pkg/security/secl/model/model_unix.go @@ -195,6 +195,8 @@ type Event struct { NetDevice NetDeviceEvent `field:"-" json:"-"` VethPair VethPairEvent `field:"-" json:"-"` UnshareMountNS UnshareMountNSEvent `field:"-" json:"-"` + // used for ebpfless + NSID uint64 `field:"-" json:"-"` } // SetPathResolutionError sets the Event.pathResolutionError diff --git a/release.json b/release.json index c06943f398c617..4e9c0fd00c0d10 100644 --- a/release.json +++ b/release.json @@ -50,7 +50,7 @@ "OMNIBUS_RUBY_VERSION": "7.50.0-rc.1", "JMXFETCH_VERSION": "0.49.0", "JMXFETCH_HASH": "b5c2c3ff27603f469bb11961d559f1154887963e02b9d70d5f1fc7efa527a486", - "SECURITY_AGENT_POLICIES_VERSION": "v0.50.0", + "SECURITY_AGENT_POLICIES_VERSION": "v0.50.1", "MACOS_BUILD_VERSION": "6.50.0-rc.1", "WINDOWS_DDNPM_DRIVER": "release-signed", "WINDOWS_DDNPM_VERSION": "2.6.0", @@ -62,7 +62,7 @@ "OMNIBUS_RUBY_VERSION": "7.50.0-rc.1", "JMXFETCH_VERSION": "0.49.0", "JMXFETCH_HASH": "b5c2c3ff27603f469bb11961d559f1154887963e02b9d70d5f1fc7efa527a486", - "SECURITY_AGENT_POLICIES_VERSION": "v0.50.0", + "SECURITY_AGENT_POLICIES_VERSION": "v0.50.1", "MACOS_BUILD_VERSION": "7.50.0-rc.1", "WINDOWS_DDNPM_DRIVER": "release-signed", "WINDOWS_DDNPM_VERSION": "2.6.0", diff --git a/tasks/cws_instrumentation.py b/tasks/cws_instrumentation.py index f3da1139a47e09..c6004d6505e1e4 100644 --- a/tasks/cws_instrumentation.py +++ b/tasks/cws_instrumentation.py @@ -37,6 +37,7 @@ def build( arch=CURRENT_ARCH, # noqa: U100 go_mod="mod", static=False, + no_strip_binary=False, ): """ Build cws-instrumentation @@ -65,9 +66,11 @@ def build( go_build_tags = " ".join(build_tags) agent_bin = BIN_PATH + strip_flags = "" if no_strip_binary else "-s -w" + cmd = ( f'go build -mod={go_mod} {race_opt} {build_type} -tags "{go_build_tags}" ' - f'-o {agent_bin} -gcflags="{gcflags}" -ldflags="{ldflags} -s -w" {REPO_PATH}/cmd/cws-instrumentation' + f'-o {agent_bin} -gcflags="{gcflags}" -ldflags="{ldflags} {strip_flags}" {REPO_PATH}/cmd/cws-instrumentation' ) ctx.run(cmd, env=env) diff --git a/tasks/new_e2e_tests.py b/tasks/new_e2e_tests.py index c136430277a8f1..07389f58fded01 100644 --- a/tasks/new_e2e_tests.py +++ b/tasks/new_e2e_tests.py @@ -209,7 +209,7 @@ def _clean_stacks(ctx: Context): def _get_existing_stacks(ctx: Context) -> List[str]: e2e_stacks: List[str] = [] - output = ctx.run("PULUMI_SKIP_UPDATE_CHECK=true pulumi stack ls --all --project e2elocal --json", pty=True) + output = ctx.run("PULUMI_SKIP_UPDATE_CHECK=true pulumi stack ls --all --project e2elocal --json", hide=True) if output is None or not output: return [] stacks_data = json.loads(output.stdout) @@ -232,22 +232,23 @@ def _destroy_stack(ctx: Context, stack: str): f"PULUMI_SKIP_UPDATE_CHECK=true pulumi destroy --stack {stack} --yes --remove --skip-preview", pty=True, warn=True, + hide=True, ) if ret is not None and ret.exited != 0: # run with refresh on first destroy attempt failure ctx.run( f"PULUMI_SKIP_UPDATE_CHECK=true pulumi destroy --stack {stack} -r --yes --remove --skip-preview", - pty=True, warn=True, + hide=True, ) def _remove_stack(ctx: Context, stack: str): - ctx.run(f"PULUMI_SKIP_UPDATE_CHECK=true pulumi stack rm --force --yes --stack {stack}", pty=True) + ctx.run(f"PULUMI_SKIP_UPDATE_CHECK=true pulumi stack rm --force --yes --stack {stack}", hide=True) def _get_pulumi_about(ctx: Context) -> dict: - output = ctx.run("PULUMI_SKIP_UPDATE_CHECK=true pulumi about --json", pty=True, hide=True) + output = ctx.run("PULUMI_SKIP_UPDATE_CHECK=true pulumi about --json", hide=True) if output is None or not output: return "" return json.loads(output.stdout) diff --git a/tasks/test.py b/tasks/test.py index dd47144dd8ec6e..66fcbaab7f119d 100644 --- a/tasks/test.py +++ b/tasks/test.py @@ -1288,12 +1288,15 @@ def parse_test_log(log_file): with open(log_file, "r") as f: for line in f: json_line = json.loads(line) - if json_line["Action"] == "fail" and "Test" in json_line: + if ( + json_line["Action"] == "fail" + and "Test" in json_line + and f'{json_line["Package"]}/{json_line["Test"]}' not in failed_tests + ): n_test_executed += 1 failed_tests.append(f'{json_line["Package"]}/{json_line["Test"]}') if json_line["Action"] == "pass" and "Test" in json_line: n_test_executed += 1 if f'{json_line["Package"]}/{json_line["Test"]}' in failed_tests: failed_tests.remove(f'{json_line["Package"]}/{json_line["Test"]}') - return failed_tests, n_test_executed diff --git a/test/kitchen/site-cookbooks/dd-system-probe-check/recipes/linux.rb b/test/kitchen/site-cookbooks/dd-system-probe-check/recipes/linux.rb index 163f94d7c7454d..809d6e9a17c1ce 100644 --- a/test/kitchen/site-cookbooks/dd-system-probe-check/recipes/linux.rb +++ b/test/kitchen/site-cookbooks/dd-system-probe-check/recipes/linux.rb @@ -206,11 +206,14 @@ sensitive true end +execute 'docker login' do + command "cat /tmp/docker_password | docker login --username #{node[:docker][:username].to_s} --password-stdin #{node[:docker][:registry]}" + user "root" + live_stream true +end + execute 'pull docker images' do - command <<-EOF - cat /tmp/docker_password | docker login --username #{node[:docker][:username].to_s} --password-stdin #{node[:docker][:registry]} - xargs -L1 -a /tmp/docker-images.txt docker pull - EOF + command 'xargs -L1 -a /tmp/docker-images.txt docker pull' user "root" live_stream true end diff --git a/test/new-e2e/tests/agent-platform/platforms/platforms.json b/test/new-e2e/tests/agent-platform/platforms/platforms.json index 03bf546900a999..b3158631605b5f 100644 --- a/test/new-e2e/tests/agent-platform/platforms/platforms.json +++ b/test/new-e2e/tests/agent-platform/platforms/platforms.json @@ -51,7 +51,8 @@ "centos-610": "ami-0506f01ccb6dddeda", "centos-79": "ami-0aedf6b1cb669b4c7", "rhel-86": "ami-06640050dc3f556bb", - "rocky-92": "ami-062b16ca222175b97" + "rocky-92": "ami-062b16ca222175b97", + "rhel-86-fips": "ami-0d0fb96b595c56e03" }, "arm64": { "centos-79": "ami-0144a5a84f5699847", diff --git a/test/new-e2e/tests/npm/agentenv_npm_test.go b/test/new-e2e/tests/npm/agentenv_npm_test.go index c204a2fd0e0c5e..dff5e4dec16635 100644 --- a/test/new-e2e/tests/npm/agentenv_npm_test.go +++ b/test/new-e2e/tests/npm/agentenv_npm_test.go @@ -9,9 +9,10 @@ import ( "testing" "time" - "github.com/DataDog/datadog-agent/test/new-e2e/pkg/utils/e2e" "github.com/DataDog/test-infra-definitions/components/datadog/agentparams" "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/test/new-e2e/pkg/utils/e2e" ) type vmSuiteEx6 struct { @@ -19,6 +20,7 @@ type vmSuiteEx6 struct { } func TestVMSuiteEx6(t *testing.T) { + t.Skip("Skipping TestVMSuiteEx6 as it's flaky") e2e.Run(t, &vmSuiteEx6{}, e2e.FakeIntakeStackDef(e2e.WithAgentParams(agentparams.WithSystemProbeConfig(systemProbeConfigNPM)))) } diff --git a/test/new-e2e/tests/npm/ec2_1host_test.go b/test/new-e2e/tests/npm/ec2_1host_test.go index 993ea6a8cb8aac..2bcb7a63ccdd54 100644 --- a/test/new-e2e/tests/npm/ec2_1host_test.go +++ b/test/new-e2e/tests/npm/ec2_1host_test.go @@ -13,11 +13,12 @@ import ( agentmodel "github.com/DataDog/agent-payload/v5/process" - "github.com/DataDog/datadog-agent/test/new-e2e/pkg/utils/e2e" - "github.com/DataDog/datadog-agent/test/new-e2e/pkg/utils/e2e/params" "github.com/DataDog/test-infra-definitions/components/datadog/agentparams" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/DataDog/datadog-agent/test/new-e2e/pkg/utils/e2e" + "github.com/DataDog/datadog-agent/test/new-e2e/pkg/utils/e2e/params" ) type ec2VMSuite struct { @@ -27,6 +28,7 @@ type ec2VMSuite struct { // TestEC2VMSuite will validate running the agent on a single EC2 VM func TestEC2VMSuite(t *testing.T) { + t.Skip("Skipping TestEC2VMSuite as it's flaky") s := &ec2VMSuite{} e2eParams := []params.Option{} // debug helper