diff --git a/.github/workflows/collector-generate-and-update.yml b/.github/workflows/collector-generate-and-update.yml new file mode 100644 index 0000000000000..b0d3326cc44b2 --- /dev/null +++ b/.github/workflows/collector-generate-and-update.yml @@ -0,0 +1,72 @@ +name: Update OTel Collector Dependencies and Generate OTel Agent + +on: + schedule: + - cron: '0 12 * * 3' + workflow_dispatch: + +jobs: + update-and-generate: + runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: + - name: Set date + id: date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + + - name: Checkout repository + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + + - name: Set up Python + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + with: + python-version: '3.12.6' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run update task + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: inv -e collector.update + + - name: Run generate task + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: inv -e collector.generate + + - name: Check for changes + id: check_changes + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git add . + if git diff-index --quiet HEAD; then + echo "No changes detected" + echo "changes_detected=false" >> $GITHUB_OUTPUT + else + echo "Changes detected" + echo "changes_detected=true" >> $GITHUB_OUTPUT + fi + + - name: Commit changes + if: steps.check_changes.outputs.changes_detected == 'true' + run: | + git switch -c update-otel-collector-dependencies-${{ steps.date.outputs.date }} + git commit -m "Update OTel Collector dependencies and generate OTel Agent" + git push -u origin update-otel-collector-dependencies-${{ steps.date.outputs.date }} + + - name: Install GitHub CLI + if: steps.check_changes.outputs.changes_detected == 'true' + run: | + sudo apt-get update + sudo apt-get install gh + + - name: Create draft pull request + if: steps.check_changes.outputs.changes_detected == 'true' + run: | + gh auth login --with-token <<< ${{ secrets.GITHUB_TOKEN }} + gh pr create --title "Update OTel collector dependencies" --body "This PR updates the OTel Collector dependencies to the latest version. Please ensure that all tests pass before marking ready for review." --base main --head update-otel-collector-dependencies-${{ steps.date.outputs.date }} --draft diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2266b84ef3560..936abc4efddfb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -173,49 +173,49 @@ variables: # To use images from datadog-agent-buildimages dev branches, set the corresponding # SUFFIX variable to _test_only DATADOG_AGENT_BUILDIMAGES_SUFFIX: "" - DATADOG_AGENT_BUILDIMAGES: v48372186-ff395e52 + DATADOG_AGENT_BUILDIMAGES: v48815877-9bfad02c DATADOG_AGENT_WINBUILDIMAGES_SUFFIX: "" - DATADOG_AGENT_WINBUILDIMAGES: v48372186-ff395e52 + DATADOG_AGENT_WINBUILDIMAGES: v48815877-9bfad02c DATADOG_AGENT_ARMBUILDIMAGES_SUFFIX: "" - DATADOG_AGENT_ARMBUILDIMAGES: v48372186-ff395e52 + DATADOG_AGENT_ARMBUILDIMAGES: v48815877-9bfad02c DATADOG_AGENT_SYSPROBE_BUILDIMAGES_SUFFIX: "" - DATADOG_AGENT_SYSPROBE_BUILDIMAGES: v48372186-ff395e52 + DATADOG_AGENT_SYSPROBE_BUILDIMAGES: v48815877-9bfad02c DATADOG_AGENT_BTF_GEN_BUILDIMAGES_SUFFIX: "" - DATADOG_AGENT_BTF_GEN_BUILDIMAGES: v48372186-ff395e52 + DATADOG_AGENT_BTF_GEN_BUILDIMAGES: v48815877-9bfad02c # New images to enable different version per image - not used yet - CI_IMAGE_BTF_GEN: v48372186-ff395e52 + CI_IMAGE_BTF_GEN: v48815877-9bfad02c CI_IMAGE_BTF_GEN_SUFFIX: "" - CI_IMAGE_DEB_X64: v48372186-ff395e52 + CI_IMAGE_DEB_X64: v48815877-9bfad02c CI_IMAGE_DEB_X64_SUFFIX: "" - CI_IMAGE_DEB_ARM64: v48372186-ff395e52 + CI_IMAGE_DEB_ARM64: v48815877-9bfad02c CI_IMAGE_DEB_ARM64_SUFFIX: "" - CI_IMAGE_DEB_ARMHF: v48372186-ff395e52 + CI_IMAGE_DEB_ARMHF: v48815877-9bfad02c CI_IMAGE_DEB_ARMHF_SUFFIX: "" - CI_IMAGE_DD_AGENT_TESTING: v48372186-ff395e52 + CI_IMAGE_DD_AGENT_TESTING: v48815877-9bfad02c CI_IMAGE_DD_AGENT_TESTING_SUFFIX: "" - CI_IMAGE_DOCKER_X64: v48372186-ff395e52 + CI_IMAGE_DOCKER_X64: v48815877-9bfad02c CI_IMAGE_DOCKER_X64_SUFFIX: "" - CI_IMAGE_DOCKER_ARM64: v48372186-ff395e52 + CI_IMAGE_DOCKER_ARM64: v48815877-9bfad02c CI_IMAGE_DOCKER_ARM64_SUFFIX: "" - CI_IMAGE_GITLAB_AGENT_DEPLOY: v48372186-ff395e52 + CI_IMAGE_GITLAB_AGENT_DEPLOY: v48815877-9bfad02c CI_IMAGE_GITLAB_AGENT_DEPLOY_SUFFIX: "" - CI_IMAGE_LINUX_GLIBC_2_17_X64: v48372186-ff395e52 + CI_IMAGE_LINUX_GLIBC_2_17_X64: v48815877-9bfad02c CI_IMAGE_LINUX_GLIBC_2_17_X64_SUFFIX: "" - CI_IMAGE_LINUX_GLIBC_2_23_ARM64: v48372186-ff395e52 + CI_IMAGE_LINUX_GLIBC_2_23_ARM64: v48815877-9bfad02c CI_IMAGE_LINUX_GLIBC_2_23_ARM64_SUFFIX: "" - CI_IMAGE_SYSTEM_PROBE_X64: v48372186-ff395e52 + CI_IMAGE_SYSTEM_PROBE_X64: v48815877-9bfad02c CI_IMAGE_SYSTEM_PROBE_X64_SUFFIX: "" - CI_IMAGE_SYSTEM_PROBE_ARM64: v48372186-ff395e52 + CI_IMAGE_SYSTEM_PROBE_ARM64: v48815877-9bfad02c CI_IMAGE_SYSTEM_PROBE_ARM64_SUFFIX: "" - CI_IMAGE_RPM_X64: v48372186-ff395e52 + CI_IMAGE_RPM_X64: v48815877-9bfad02c CI_IMAGE_RPM_X64_SUFFIX: "" - CI_IMAGE_RPM_ARM64: v48372186-ff395e52 + CI_IMAGE_RPM_ARM64: v48815877-9bfad02c CI_IMAGE_RPM_ARM64_SUFFIX: "" - CI_IMAGE_RPM_ARMHF: v48372186-ff395e52 + CI_IMAGE_RPM_ARMHF: v48815877-9bfad02c CI_IMAGE_RPM_ARMHF_SUFFIX: "" - CI_IMAGE_WIN_1809_X64: v48372186-ff395e52 + CI_IMAGE_WIN_1809_X64: v48815877-9bfad02c CI_IMAGE_WIN_1809_X64_SUFFIX: "" - CI_IMAGE_WIN_LTSC2022_X64: v48372186-ff395e52 + CI_IMAGE_WIN_LTSC2022_X64: v48815877-9bfad02c CI_IMAGE_WIN_LTSC2022_X64_SUFFIX: "" DATADOG_AGENT_EMBEDDED_PATH: /opt/datadog-agent/embedded diff --git a/.gitlab/common/test_infra_version.yml b/.gitlab/common/test_infra_version.yml index b2461c4666a84..dd6770058e7ec 100644 --- a/.gitlab/common/test_infra_version.yml +++ b/.gitlab/common/test_infra_version.yml @@ -4,4 +4,4 @@ variables: # and check the job creating the image to make sure you have the right SHA prefix TEST_INFRA_DEFINITIONS_BUILDIMAGES_SUFFIX: "" # Make sure to update test-infra-definitions version in go.mod as well - TEST_INFRA_DEFINITIONS_BUILDIMAGES: 7cd5e8a62570 + TEST_INFRA_DEFINITIONS_BUILDIMAGES: 047dd64128b6 diff --git a/.gitlab/container_build/docker_linux.yml b/.gitlab/container_build/docker_linux.yml index 5f5c83c0dce67..773f91fd1ca3a 100644 --- a/.gitlab/container_build/docker_linux.yml +++ b/.gitlab/container_build/docker_linux.yml @@ -89,6 +89,33 @@ docker_build_agent7_arm64: TAG_SUFFIX: -7 BUILD_ARG: --target test --build-arg DD_AGENT_ARTIFACT=datadog-agent-7*-arm64.tar.xz +# build agent7 fips image +docker_build_fips_agent7: + extends: [.docker_build_job_definition_amd64, .docker_build_artifact] + rules: + - !reference [.except_mergequeue] + - when: on_success + needs: + - job: datadog-agent-7-x64-fips + variables: + IMAGE: registry.ddbuild.io/ci/datadog-agent/agent + BUILD_CONTEXT: Dockerfiles/agent + TAG_SUFFIX: -7-fips + BUILD_ARG: --target test --build-arg DD_AGENT_ARTIFACT=datadog-fips-agent-7*-amd64.tar.xz + +docker_build_fips_agent7_arm64: + extends: [.docker_build_job_definition_arm64, .docker_build_artifact] + rules: + - !reference [.except_mergequeue] + - when: on_success + needs: + - job: datadog-agent-7-arm64-fips + variables: + IMAGE: registry.ddbuild.io/ci/datadog-agent/agent + BUILD_CONTEXT: Dockerfiles/agent + TAG_SUFFIX: -7-fips + BUILD_ARG: --target test --build-arg DD_AGENT_ARTIFACT=datadog-fips-agent-7*-arm64.tar.xz + # build agent7 jmx image docker_build_agent7_jmx: extends: [.docker_build_job_definition_amd64, .docker_build_artifact] @@ -116,6 +143,32 @@ docker_build_agent7_jmx_arm64: TAG_SUFFIX: -7-jmx BUILD_ARG: --target test --build-arg WITH_JMX=true --build-arg DD_AGENT_ARTIFACT=datadog-agent-7*-arm64.tar.xz +docker_build_fips_agent7_jmx: + extends: [.docker_build_job_definition_amd64, .docker_build_artifact] + rules: + - !reference [.except_mergequeue] + - when: on_success + needs: + - job: datadog-agent-7-x64-fips + variables: + IMAGE: registry.ddbuild.io/ci/datadog-agent/agent + BUILD_CONTEXT: Dockerfiles/agent + TAG_SUFFIX: -7-fips-jmx + BUILD_ARG: --target test --build-arg DD_AGENT_ARTIFACT=datadog-fips-agent-7*-amd64.tar.xz + +docker_build_fips_agent7_arm64_jmx: + extends: [.docker_build_job_definition_arm64, .docker_build_artifact] + rules: + - !reference [.except_mergequeue] + - when: on_success + needs: + - job: datadog-agent-7-arm64-fips + variables: + IMAGE: registry.ddbuild.io/ci/datadog-agent/agent + BUILD_CONTEXT: Dockerfiles/agent + TAG_SUFFIX: -7-fips-jmx + BUILD_ARG: --target test --build-arg DD_AGENT_ARTIFACT=datadog-fips-agent-7*-arm64.tar.xz + # build agent7 UA image docker_build_ot_agent7: extends: [.docker_build_job_definition_amd64, .docker_build_artifact] diff --git a/.gitlab/deploy_packages/nix.yml b/.gitlab/deploy_packages/nix.yml index 8f16b27a38b55..314fcb4961fe7 100644 --- a/.gitlab/deploy_packages/nix.yml +++ b/.gitlab/deploy_packages/nix.yml @@ -14,6 +14,18 @@ deploy_packages_deb-arm64-7: variables: PACKAGE_ARCH: arm64 +deploy_packages_deb-x64-7-fips: + extends: .deploy_packages_deb-7 + needs: [ agent_deb-x64-a7-fips ] + variables: + PACKAGE_ARCH: amd64 + +deploy_packages_deb-arm64-7-fips: + extends: .deploy_packages_deb-7 + needs: [ agent_deb-arm64-a7-fips ] + variables: + PACKAGE_ARCH: arm64 + deploy_packages_heroku_deb-x64-7: extends: .deploy_packages_deb-7 needs: [ agent_heroku_deb-x64-a7 ] @@ -62,6 +74,18 @@ deploy_packages_rpm-arm64-7: variables: PACKAGE_ARCH: aarch64 +deploy_packages_rpm-x64-7-fips: + extends: .deploy_packages_rpm-7 + needs: [ agent_rpm-x64-a7-fips ] + variables: + PACKAGE_ARCH: x86_64 + +deploy_packages_rpm-arm64-7-fips: + extends: .deploy_packages_rpm-7 + needs: [ agent_rpm-arm64-a7-fips ] + variables: + PACKAGE_ARCH: aarch64 + deploy_packages_iot_rpm-x64-7: extends: .deploy_packages_rpm-7 needs: [ iot_agent_rpm-x64 ] @@ -98,6 +122,18 @@ deploy_packages_suse_rpm-arm64-7: variables: PACKAGE_ARCH: aarch64 +deploy_packages_suse_rpm-x64-7-fips: + extends: .deploy_packages_suse_rpm-7 + needs: [ agent_suse-x64-a7-fips ] + variables: + PACKAGE_ARCH: x86_64 + +deploy_packages_suse_rpm-arm64-7-fips: + extends: .deploy_packages_suse_rpm-7 + needs: [ agent_suse-arm64-a7-fips ] + variables: + PACKAGE_ARCH: aarch64 + deploy_packages_iot_suse_rpm-x64-7: extends: .deploy_packages_suse_rpm-7 needs: [ iot_agent_suse-x64 ] diff --git a/.gitlab/dev_container_deploy/docker_linux.yml b/.gitlab/dev_container_deploy/docker_linux.yml index 57178929aff04..5b61ffc403a2b 100644 --- a/.gitlab/dev_container_deploy/docker_linux.yml +++ b/.gitlab/dev_container_deploy/docker_linux.yml @@ -32,6 +32,24 @@ dev_branch_multiarch-a7: - IMG_SOURCES: ${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-jmx-amd64,${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-jmx-arm64 IMG_DESTINATIONS: agent-dev:${CI_COMMIT_REF_SLUG}-py3-jmx +dev_branch_multiarch-fips: + extends: .docker_publish_job_definition + stage: dev_container_deploy + rules: !reference [.manual] + needs: + - docker_build_fips_agent7 + - docker_build_fips_agent7_arm64 + - docker_build_fips_agent7_jmx + - docker_build_fips_agent7_arm64_jmx + variables: + IMG_REGISTRIES: dev + parallel: + matrix: + - IMG_SOURCES: ${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-fips-amd64,${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-fips-arm64 + IMG_DESTINATIONS: agent-dev:${CI_COMMIT_REF_SLUG}-fips + - IMG_SOURCES: ${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-fips-jmx-amd64,${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-fips-jmx-arm64 + IMG_DESTINATIONS: agent-dev:${CI_COMMIT_REF_SLUG}-fips-jmx + dev_branch_multiarch-dogstatsd: extends: .docker_publish_job_definition stage: dev_container_deploy diff --git a/.gitlab/dev_container_deploy/e2e.yml b/.gitlab/dev_container_deploy/e2e.yml index 956f0ae8ef59c..f8c45e3fba8c2 100644 --- a/.gitlab/dev_container_deploy/e2e.yml +++ b/.gitlab/dev_container_deploy/e2e.yml @@ -17,6 +17,23 @@ qa_agent: IMG_SOURCES: ${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-amd64,${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-arm64,${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-win1809-amd64,${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-winltsc2022-amd64 IMG_DESTINATIONS: agent:${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA} +qa_agent_jmx: + extends: .docker_publish_job_definition + stage: dev_container_deploy + rules: + - !reference [.except_mergequeue] + - !reference [.except_disable_e2e_tests] + - when: on_success + needs: + - docker_build_agent7_jmx + - docker_build_agent7_jmx_arm64 + - docker_build_agent7_windows1809_jmx + - docker_build_agent7_windows2022_jmx + variables: + IMG_REGISTRIES: agent-qa + IMG_SOURCES: ${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-jmx-amd64,${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-jmx-arm64,${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-jmx-win1809-amd64,${SRC_AGENT}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-jmx-winltsc2022-amd64 + IMG_DESTINATIONS: agent:${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-jmx + qa_agent_ot: extends: .docker_publish_job_definition stage: dev_container_deploy diff --git a/.gitlab/e2e/e2e.yml b/.gitlab/e2e/e2e.yml index aa3c21938b4ce..8a6869cae5baa 100644 --- a/.gitlab/e2e/e2e.yml +++ b/.gitlab/e2e/e2e.yml @@ -79,6 +79,7 @@ needs: - !reference [.needs_new_e2e_template] - qa_agent + - qa_agent_jmx - qa_dca - qa_dogstatsd @@ -410,6 +411,20 @@ new-e2e-installer-windows: TARGETS: ./tests/installer/windows TEAM: fleet FLEET_INSTALL_METHOD: "windows" + parallel: + matrix: + # agent-package + - EXTRA_PARAMS: --run "TestAgentInstallsWithAgentUser$" + - EXTRA_PARAMS: --run "TestAgentInstalls$" + - EXTRA_PARAMS: --run "TestAgentUpgrades$" + # install-script + - EXTRA_PARAMS: --run "TestInstallScriptWithAgentUser$" + # installer-package + - EXTRA_PARAMS: --run "TestInstaller$" + - EXTRA_PARAMS: --run "TestInstallerRollback$" + - EXTRA_PARAMS: --run "TestInstallerSystemIntegrity$" + - EXTRA_PARAMS: --run "TestInstallerUpgrades$" + new-e2e-installer-ansible: extends: .new_e2e_template @@ -468,6 +483,10 @@ new-e2e-windows-systemprobe: TARGETS: ./tests/sysprobe-functional TEAM: windows-kernel-integrations SHOULD_RUN_IN_FLAKES_FINDER: "false" # Currently broken in flake finder ADXT-687 + parallel: + matrix: + - EXTRA_PARAMS: --run TestUSMAutoTaggingSuite + - EXTRA_PARAMS: --run TestVMSuite new-e2e-windows-security-agent: extends: .new_e2e_template diff --git a/.gitlab/internal_image_deploy/internal_image_deploy.yml b/.gitlab/internal_image_deploy/internal_image_deploy.yml index 82ffc58bc25f8..7c155413ced58 100644 --- a/.gitlab/internal_image_deploy/internal_image_deploy.yml +++ b/.gitlab/internal_image_deploy/internal_image_deploy.yml @@ -47,6 +47,51 @@ docker_trigger_internal: --variable TARGET_ENV --variable DYNAMIC_BUILD_RENDER_TARGET_FORWARD_PARAMETERS" +docker_trigger_internal-fips: + stage: internal_image_deploy + rules: !reference [.on_deploy_internal_or_manual] + needs: + - job: docker_build_fips_agent7 + artifacts: false + - job: docker_build_fips_agent7_arm64 + artifacts: false + image: registry.ddbuild.io/ci/datadog-agent-buildimages/deb_x64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES + tags: ["arch:amd64"] + variables: + DYNAMIC_BUILD_RENDER_RULES: agent-build-only # fake rule to not trigger the ones in the images repo + IMAGE_VERSION: tmpl-v11 + IMAGE_NAME: datadog-agent + RELEASE_TAG: ${CI_COMMIT_REF_SLUG}-fips + BUILD_TAG: ${CI_COMMIT_REF_SLUG}-fips + TMPL_SRC_IMAGE: v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-7-fips + TMPL_SRC_REPO: ci/datadog-agent/agent + RELEASE_STAGING: "true" + script: + - GITLAB_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $GITLAB_TOKEN write_api) || exit $?; export GITLAB_TOKEN + - if [ "$BUCKET_BRANCH" = "beta" ] || [ "$BUCKET_BRANCH" = "stable" ]; then TMPL_SRC_REPO="${TMPL_SRC_REPO}-release"; fi + - | + if [ "$BUCKET_BRANCH" = "nightly" ]; then + RELEASE_TAG="${RELEASE_TAG}-${CI_COMMIT_SHORT_SHA}" + TMPL_SRC_REPO="${TMPL_SRC_REPO}-nightly" + fi + - if [ "$BUCKET_BRANCH" = "dev" ]; then RELEASE_TAG="dev-${RELEASE_TAG}-${CI_COMMIT_SHORT_SHA}"; fi + - "inv pipeline.trigger-child-pipeline --project-name DataDog/images --git-ref master --timeout 3600 + --variable IMAGE_VERSION + --variable IMAGE_NAME + --variable RELEASE_TAG + --variable BUILD_TAG + --variable TMPL_SRC_IMAGE + --variable TMPL_SRC_REPO + --variable RELEASE_STAGING + --variable RELEASE_PROD + --variable DYNAMIC_BUILD_RENDER_RULES + --variable APPS + --variable BAZEL_TARGET + --variable DDR + --variable DDR_WORKFLOW_ID + --variable TARGET_ENV + --variable DYNAMIC_BUILD_RENDER_TARGET_FORWARD_PARAMETERS" + docker_trigger_internal-ot: stage: internal_image_deploy rules: !reference [.on_deploy_internal_or_manual] diff --git a/.gitlab/package_build/linux.yml b/.gitlab/package_build/linux.yml index d129676f3bc3c..f8d785d4ef62d 100644 --- a/.gitlab/package_build/linux.yml +++ b/.gitlab/package_build/linux.yml @@ -67,6 +67,12 @@ before_script: - export RELEASE_VERSION=$RELEASE_VERSION_7 +.agent_fips_build: + variables: + FLAVOR: fips + before_script: + - export RELEASE_VERSION=$RELEASE_VERSION_7 + # build Agent 7 binaries for x86_64 datadog-agent-7-x64: extends: [.agent_build_common, .agent_build_x86, .agent_7_build] @@ -83,6 +89,14 @@ datadog-ot-agent-7-x64: datadog-ot-agent-7-arm64: extends: [.agent_build_common, .agent_build_arm64, .ot_agent_7_build] +# build Agent 7 binaries for x86_64 with FIPS +datadog-agent-7-x64-fips: + extends: [.agent_build_common, .agent_build_x86, .agent_fips_build] + +# build Agent 7 binaries for arm64 with FIPS +datadog-agent-7-arm64-fips: + extends: [.agent_build_common, .agent_build_arm64, .agent_fips_build] + .iot-agent-common: extends: .agent_build_common needs: ["go_mod_tidy_check", "go_deps"] diff --git a/.gitlab/packaging/deb.yml b/.gitlab/packaging/deb.yml index a133b70d749a1..277897c6060f6 100644 --- a/.gitlab/packaging/deb.yml +++ b/.gitlab/packaging/deb.yml @@ -56,6 +56,26 @@ agent_deb-arm64-a7: variables: DD_PROJECT: "agent" +agent_deb-x64-a7-fips: + extends: [.package_deb_common, .package_deb_x86, .package_deb_agent_7] + rules: + - !reference [.except_mergequeue] + - when: on_success + needs: ["datadog-agent-7-x64-fips"] + variables: + OMNIBUS_EXTRA_ARGS: "--flavor fips" + DD_PROJECT: "agent" + +agent_deb-arm64-a7-fips: + extends: [.package_deb_common, .package_deb_arm64, .package_deb_agent_7] + rules: + - !reference [.except_mergequeue] + - when: on_success + needs: ["datadog-agent-7-arm64-fips"] + variables: + OMNIBUS_EXTRA_ARGS: "--flavor fips" + DD_PROJECT: "agent" + .package_ot_deb_common: extends: [.package_deb_common] script: diff --git a/.gitlab/packaging/rpm.yml b/.gitlab/packaging/rpm.yml index 9444f7062ad89..883f3910cc086 100644 --- a/.gitlab/packaging/rpm.yml +++ b/.gitlab/packaging/rpm.yml @@ -92,6 +92,35 @@ installer_rpm-amd64: # explicitly disable the check PACKAGE_REQUIRED_FILES_LIST: "" +agent_rpm-x64-a7-fips: + extends: [.package_rpm_common, .package_rpm_agent_7, .package_rpm_x86] + tags: ["arch:amd64"] + needs: ["datadog-agent-7-x64-fips"] + variables: + OMNIBUS_EXTRA_ARGS: "--flavor fips" + DD_PROJECT: agent + +agent_rpm-arm64-a7-fips: + extends: [.package_rpm_common, .package_rpm_agent_7, .package_rpm_arm64] + needs: ["datadog-agent-7-arm64-fips"] + variables: + OMNIBUS_EXTRA_ARGS: "--flavor fips" + DD_PROJECT: agent + +agent_suse-x64-a7-fips: + extends: [.package_suse_rpm_common, .package_rpm_agent_7, .package_rpm_x86] + needs: ["datadog-agent-7-x64-fips"] + variables: + OMNIBUS_EXTRA_ARGS: "--flavor fips" + DD_PROJECT: agent + +agent_suse-arm64-a7-fips: + extends: [.package_suse_rpm_common, .package_rpm_agent_7, .package_rpm_arm64] + needs: ["datadog-agent-7-arm64-fips"] + variables: + OMNIBUS_EXTRA_ARGS: "--flavor fips" + DD_PROJECT: agent + installer_rpm-arm64: extends: [.package_rpm_common, .package_rpm_agent_7, .package_rpm_arm64] needs: ["installer-arm64"] diff --git a/Dockerfiles/agent/Dockerfile b/Dockerfiles/agent/Dockerfile index bf99bef8e05a1..6c8cb42eedccf 100644 --- a/Dockerfiles/agent/Dockerfile +++ b/Dockerfiles/agent/Dockerfile @@ -180,6 +180,13 @@ RUN tar xzf s6.tgz -C / --exclude="./bin" \ # * https://datadoghq.atlassian.net/wiki/spaces/TS/pages/2615709591/Why+the+containerized+Agent+runs+as+root#Agent-user RUN [ "$(getent passwd dd-agent | cut -d: -f 3)" -eq 100 ] +# Enable FIPS if needed +RUN if [ -x /opt/datadog-agent/embedded/bin/fipsinstall.sh ]; then \ + /opt/datadog-agent/embedded/bin/fipsinstall.sh; \ +fi +# This is used by MSGO to enable FIPS mode so it won't affect the non-FIPS image +ENV GOFIPS=1 + # Override the exit script by ours to fix --pid=host operations RUN mv /etc/s6/init/init-stage3 /etc/s6/init/init-stage3-original COPY init-stage3 /etc/s6/init/init-stage3 diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index ba2ac82c2ee84..32bfe097dee1d 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -2491,6 +2491,7 @@ core,go.opentelemetry.io/collector/semconv/v1.16.0,Apache-2.0,Copyright The Open core,go.opentelemetry.io/collector/semconv/v1.17.0,Apache-2.0,Copyright The OpenTelemetry Authors core,go.opentelemetry.io/collector/semconv/v1.18.0,Apache-2.0,Copyright The OpenTelemetry Authors core,go.opentelemetry.io/collector/semconv/v1.21.0,Apache-2.0,Copyright The OpenTelemetry Authors +core,go.opentelemetry.io/collector/semconv/v1.22.0,Apache-2.0,Copyright The OpenTelemetry Authors core,go.opentelemetry.io/collector/semconv/v1.25.0,Apache-2.0,Copyright The OpenTelemetry Authors core,go.opentelemetry.io/collector/semconv/v1.26.0,Apache-2.0,Copyright The OpenTelemetry Authors core,go.opentelemetry.io/collector/semconv/v1.27.0,Apache-2.0,Copyright The OpenTelemetry Authors diff --git a/README.md b/README.md index ab137bbe48339..8e93e1988c48e 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,7 @@ To build the Agent you need: **Note:** you may want to use a python virtual environment to avoid polluting your system-wide python environment with the agent build/dev dependencies. You can create a virtual environment using `virtualenv` and then use the `invoke agent.build` - parameters `--python-home-2=` and/or `--python-home-3=` - (depending on the python versions you are using) to use the virtual environment's + parameters `--python-home-3=` to use the virtual environment's interpreter and libraries. By default, this environment is only used for dev dependencies listed in `requirements.txt`. @@ -55,7 +54,6 @@ To start working on the Agent, you can build the `main` branch: virtualenvs): invoke agent.build \ - --python-home-2=$GOPATH/src/github.com/DataDog/datadog-agent/venv2 \ --python-home-3=$GOPATH/src/github.com/DataDog/datadog-agent/venv3 Running `invoke agent.build`: diff --git a/cmd/cluster-agent-cloudfoundry/subcommands/run/command.go b/cmd/cluster-agent-cloudfoundry/subcommands/run/command.go index 42e40b88a7221..b16485b36f9e4 100644 --- a/cmd/cluster-agent-cloudfoundry/subcommands/run/command.go +++ b/cmd/cluster-agent-cloudfoundry/subcommands/run/command.go @@ -195,7 +195,7 @@ func run( } var clusterCheckHandler *clusterchecks.Handler - clusterCheckHandler, err = setupClusterCheck(mainCtx, ac) + clusterCheckHandler, err = setupClusterCheck(mainCtx, ac, taggerComp) if err == nil { api.ModifyAPIRouter(func(r *mux.Router) { dcav1.InstallChecksEndpoints(r, clusteragent.ServerContext{ClusterCheckHandler: clusterCheckHandler}) @@ -302,8 +302,8 @@ func initializeBBSCache(ctx context.Context) error { } } -func setupClusterCheck(ctx context.Context, ac autodiscovery.Component) (*clusterchecks.Handler, error) { - handler, err := clusterchecks.NewHandler(ac) +func setupClusterCheck(ctx context.Context, ac autodiscovery.Component, tagger tagger.Component) (*clusterchecks.Handler, error) { + handler, err := clusterchecks.NewHandler(ac, tagger) if err != nil { return nil, err } diff --git a/cmd/cluster-agent/subcommands/start/command.go b/cmd/cluster-agent/subcommands/start/command.go index ed0030f0183cd..f85419780b657 100644 --- a/cmd/cluster-agent/subcommands/start/command.go +++ b/cmd/cluster-agent/subcommands/start/command.go @@ -389,7 +389,7 @@ func start(log log.Component, if config.GetBool("cluster_checks.enabled") { // Start the cluster check Autodiscovery - clusterCheckHandler, err := setupClusterCheck(mainCtx, ac) + clusterCheckHandler, err := setupClusterCheck(mainCtx, ac, taggerComp) if err == nil { api.ModifyAPIRouter(func(r *mux.Router) { dcav1.InstallChecksEndpoints(r, clusteragent.ServerContext{ClusterCheckHandler: clusterCheckHandler}) @@ -551,8 +551,8 @@ func start(log log.Component, return nil } -func setupClusterCheck(ctx context.Context, ac autodiscovery.Component) (*clusterchecks.Handler, error) { - handler, err := clusterchecks.NewHandler(ac) +func setupClusterCheck(ctx context.Context, ac autodiscovery.Component, tagger tagger.Component) (*clusterchecks.Handler, error) { + handler, err := clusterchecks.NewHandler(ac, tagger) if err != nil { return nil, err } diff --git a/cmd/serverless/main.go b/cmd/serverless/main.go index 4ac960ed6a32e..77f46c01fded4 100644 --- a/cmd/serverless/main.go +++ b/cmd/serverless/main.go @@ -345,7 +345,7 @@ func startOtlpAgent(wg *sync.WaitGroup, metricAgent *metrics.ServerlessMetricAge func startTraceAgent(wg *sync.WaitGroup, lambdaSpanChan chan *pb.Span, coldStartSpanId uint64, serverlessDaemon *daemon.Daemon, tagger tagger.Component, rcService *remoteconfig.CoreAgentService) { defer wg.Done() traceAgent := trace.StartServerlessTraceAgent(trace.StartServerlessTraceAgentArgs{ - Enabled: pkgconfigsetup.Datadog().GetBool("apm_config.enabled"), + Enabled: configUtils.IsAPMEnabled(pkgconfigsetup.Datadog()), LoadConfig: &trace.LoadConfig{Path: datadogConfigPath, Tagger: tagger}, LambdaSpanChan: lambdaSpanChan, ColdStartSpanID: coldStartSpanId, diff --git a/cmd/system-probe/config/adjust_usm.go b/cmd/system-probe/config/adjust_usm.go index 4241fa6ccbab9..b3c1bcf93f415 100644 --- a/cmd/system-probe/config/adjust_usm.go +++ b/cmd/system-probe/config/adjust_usm.go @@ -30,6 +30,7 @@ func adjustUSM(cfg model.Config) { deprecateBool(cfg, netNS("enable_http_monitoring"), smNS("enable_http_monitoring")) deprecateBool(cfg, netNS("enable_https_monitoring"), smNS("tls", "native", "enabled")) deprecateBool(cfg, smNS("enable_go_tls_support"), smNS("tls", "go", "enabled")) + applyDefault(cfg, smNS("tls", "go", "enabled"), true) deprecateGeneric(cfg, netNS("http_replace_rules"), smNS("http_replace_rules")) deprecateInt64(cfg, netNS("max_tracked_http_connections"), smNS("max_tracked_http_connections")) applyDefault(cfg, smNS("max_tracked_http_connections"), 1024) diff --git a/cmd/system-probe/modules/eventmonitor.go b/cmd/system-probe/modules/eventmonitor.go index 1f5e0e874667a..f74b13bec36af 100644 --- a/cmd/system-probe/modules/eventmonitor.go +++ b/cmd/system-probe/modules/eventmonitor.go @@ -36,6 +36,7 @@ func createEventMonitorModule(_ *sysconfigtypes.Config, deps module.FactoryDepen opts := eventmonitor.Opts{} opts.ProbeOpts.EnvsVarResolutionEnabled = emconfig.EnvVarsResolutionEnabled + opts.ProbeOpts.Tagger = deps.Tagger secmoduleOpts := secmodule.Opts{} // adapt options @@ -45,7 +46,7 @@ func createEventMonitorModule(_ *sysconfigtypes.Config, deps module.FactoryDepen secmodule.DisableRuntimeSecurity(secconfig) } - evm, err := eventmonitor.NewEventMonitor(emconfig, secconfig, opts, deps.Telemetry) + evm, err := eventmonitor.NewEventMonitor(emconfig, secconfig, opts) if err != nil { log.Errorf("error initializing event monitoring module: %v", err) return nil, module.ErrNotEnabled diff --git a/comp/collector/collector/collectorimpl/agent_check_metadata_test.go b/comp/collector/collector/collectorimpl/agent_check_metadata_test.go index 114dd0b8f93df..f997bd0332518 100644 --- a/comp/collector/collector/collectorimpl/agent_check_metadata_test.go +++ b/comp/collector/collector/collectorimpl/agent_check_metadata_test.go @@ -16,6 +16,7 @@ import ( "github.com/DataDog/datadog-agent/comp/aggregator/demultiplexer/demultiplexerimpl" "github.com/DataDog/datadog-agent/comp/core" "github.com/DataDog/datadog-agent/comp/core/config" + haagentmock "github.com/DataDog/datadog-agent/comp/haagent/mock" "github.com/DataDog/datadog-agent/pkg/collector/externalhost" "github.com/DataDog/datadog-agent/pkg/serializer" "github.com/DataDog/datadog-agent/pkg/util/fxutil" @@ -36,6 +37,7 @@ func TestExternalHostTags(t *testing.T) { c := newCollector(fxutil.Test[dependencies](t, core.MockBundle(), demultiplexerimpl.MockModule(), + haagentmock.Module(), fx.Provide(func() optional.Option[serializer.MetricSerializer] { return optional.NewNoneOption[serializer.MetricSerializer]() }), diff --git a/comp/collector/collector/collectorimpl/collector.go b/comp/collector/collector/collectorimpl/collector.go index f90b0c61cec88..94b10a962d3ed 100644 --- a/comp/collector/collector/collectorimpl/collector.go +++ b/comp/collector/collector/collectorimpl/collector.go @@ -24,6 +24,7 @@ import ( flaretypes "github.com/DataDog/datadog-agent/comp/core/flare/types" log "github.com/DataDog/datadog-agent/comp/core/log/def" "github.com/DataDog/datadog-agent/comp/core/status" + haagent "github.com/DataDog/datadog-agent/comp/haagent/def" metadata "github.com/DataDog/datadog-agent/comp/metadata/runner/runnerimpl" "github.com/DataDog/datadog-agent/pkg/aggregator/sender" pkgCollector "github.com/DataDog/datadog-agent/pkg/collector" @@ -48,17 +49,19 @@ const ( type dependencies struct { fx.In - Lc fx.Lifecycle - Config config.Component - Log log.Component + Lc fx.Lifecycle + Config config.Component + Log log.Component + HaAgent haagent.Component SenderManager sender.SenderManager MetricSerializer optional.Option[serializer.MetricSerializer] } type collectorImpl struct { - log log.Component - config config.Component + log log.Component + config config.Component + haAgent haagent.Component senderManager sender.SenderManager metricSerializer optional.Option[serializer.MetricSerializer] @@ -119,6 +122,7 @@ func newCollector(deps dependencies) *collectorImpl { c := &collectorImpl{ log: deps.Log, config: deps.Config, + haAgent: deps.HaAgent, senderManager: deps.SenderManager, metricSerializer: deps.MetricSerializer, checks: make(map[checkid.ID]*middleware.CheckWrapper), @@ -186,7 +190,7 @@ func (c *collectorImpl) start(_ context.Context) error { c.m.Lock() defer c.m.Unlock() - run := runner.NewRunner(c.senderManager) + run := runner.NewRunner(c.senderManager, c.haAgent) sched := scheduler.NewScheduler(run.GetChan()) // let the runner some visibility into the scheduler diff --git a/comp/collector/collector/collectorimpl/collector_demux_test.go b/comp/collector/collector/collectorimpl/collector_demux_test.go index b2daf0bc28fd9..ed91be6ef1ade 100644 --- a/comp/collector/collector/collectorimpl/collector_demux_test.go +++ b/comp/collector/collector/collectorimpl/collector_demux_test.go @@ -21,6 +21,7 @@ import ( "github.com/DataDog/datadog-agent/comp/core/config" log "github.com/DataDog/datadog-agent/comp/core/log/def" logmock "github.com/DataDog/datadog-agent/comp/core/log/mock" + haagentmock "github.com/DataDog/datadog-agent/comp/haagent/mock" compressionmock "github.com/DataDog/datadog-agent/comp/serializer/compression/fx-mock" checkid "github.com/DataDog/datadog-agent/pkg/collector/check/id" @@ -85,6 +86,7 @@ func (suite *CollectorDemuxTestSuite) SetupTest() { suite.SenderManagerMock = NewSenderManagerMock(suite.demux) suite.c = newCollector(fxutil.Test[dependencies](suite.T(), core.MockBundle(), + haagentmock.Module(), fx.Provide(func() sender.SenderManager { return suite.SenderManagerMock }), diff --git a/comp/collector/collector/collectorimpl/collector_test.go b/comp/collector/collector/collectorimpl/collector_test.go index 1aea4301bc791..44ddc7e2f357c 100644 --- a/comp/collector/collector/collectorimpl/collector_test.go +++ b/comp/collector/collector/collectorimpl/collector_test.go @@ -22,6 +22,7 @@ import ( "github.com/DataDog/datadog-agent/comp/collector/collector/collectorimpl/internal/middleware" "github.com/DataDog/datadog-agent/comp/core" "github.com/DataDog/datadog-agent/comp/core/config" + haagentmock "github.com/DataDog/datadog-agent/comp/haagent/mock" "github.com/DataDog/datadog-agent/pkg/aggregator" "github.com/DataDog/datadog-agent/pkg/collector/check" checkid "github.com/DataDog/datadog-agent/pkg/collector/check/id" @@ -97,6 +98,7 @@ func (suite *CollectorTestSuite) SetupTest() { suite.c = newCollector(fxutil.Test[dependencies](suite.T(), core.MockBundle(), demultiplexerimpl.MockModule(), + haagentmock.Module(), fx.Provide(func() optional.Option[serializer.MetricSerializer] { return optional.NewNoneOption[serializer.MetricSerializer]() }), diff --git a/comp/core/agenttelemetry/impl/agenttelemetry.go b/comp/core/agenttelemetry/impl/agenttelemetry.go index de05da82a5b79..2b54f2b1eb5c7 100644 --- a/comp/core/agenttelemetry/impl/agenttelemetry.go +++ b/comp/core/agenttelemetry/impl/agenttelemetry.go @@ -7,12 +7,15 @@ package agenttelemetryimpl import ( + "bytes" "context" "encoding/json" "errors" "fmt" "net/http" "strconv" + "strings" + "sync" "golang.org/x/exp/maps" @@ -40,6 +43,10 @@ type atel struct { cancelCtx context.Context cancel context.CancelFunc + + prevPromMetricCounterValues map[string]float64 + prevPromMetricHistogramValues map[string]uint64 + prevPromMetricValuesMU sync.Mutex } // Requires defines the dependencies for the agenttelemetry component @@ -128,6 +135,9 @@ func createAtel( sender: sender, runner: runner, atelCfg: atelCfg, + + prevPromMetricCounterValues: make(map[string]float64), + prevPromMetricHistogramValues: make(map[string]uint64), } } @@ -189,8 +199,8 @@ func (a *atel) aggregateMetricTags(mCfg *MetricConfig, mt dto.MetricType, ms []* tagsKey := "" // if tags are defined, we need to create a key from them by dropping not specified - // in configuration tags. The key is constructed by conatenating specified tag names and values - // if the a timeseries has tags is not specified in + // in configuration tags. The key is constructed by concatenating specified tag names + // and values if a timeseries has tags is not specified origTags := m.GetLabel() if len(origTags) > 0 { // sort tags (to have a consistent key for the same tag set) @@ -245,6 +255,102 @@ func (a *atel) aggregateMetricTags(mCfg *MetricConfig, mt dto.MetricType, ms []* return maps.Values(amMap) } +func buildKeysForMetricsPreviousValues(mt dto.MetricType, metricName string, metrics []*dto.Metric) []string { + keyNames := make([]string, 0, len(metrics)) + for _, m := range metrics { + var keyName string + tags := m.GetLabel() + if len(tags) == 0 { + // start with the metric name + keyName = metricName + } else { + // Sort tags to stability of the key + sortedTags := cloneLabelsSorted(tags) + var builder strings.Builder + + // start with the metric name plus the tags + builder.WriteString(metricName) + for _, tag := range sortedTags { + builder.WriteString(makeLabelPairKey(tag)) + } + keyName = builder.String() + } + + if mt == dto.MetricType_HISTOGRAM { + // add bucket names to the key + for _, bucket := range m.Histogram.GetBucket() { + keyNames = append(keyNames, fmt.Sprintf("%v:%v", keyName, bucket.GetUpperBound())) + } + } else { + keyNames = append(keyNames, keyName) + } + } + + return keyNames +} + +func convertPromHistogramsToDatadogHistogramsValues(metrics []*dto.Metric, prevPromMetricValues map[string]uint64, keyNames []string) { + if len(metrics) > 0 { + bucketCount := len(metrics[0].Histogram.GetBucket()) + for i, m := range metrics { + // First, deduct the previous cumulative count from the current one + for j, b := range m.Histogram.GetBucket() { + key := keyNames[(i*bucketCount)+j] + curValue := b.GetCumulativeCount() + + // Adjust the counter value if found + if prevValue, ok := prevPromMetricValues[key]; ok { + *b.CumulativeCount -= prevValue + } + + // Upsert the cache of previous counter values + prevPromMetricValues[key] = curValue + } + + // Then, de-cumulate next bucket value from the previous bucket values + var prevValue uint64 + for _, b := range m.Histogram.GetBucket() { + curValue := b.GetCumulativeCount() + *b.CumulativeCount -= prevValue + prevValue = curValue + } + } + } +} + +func convertPromCountersToDatadogCountersValues(metrics []*dto.Metric, prevPromMetricValues map[string]float64, keyNames []string) { + for i, m := range metrics { + key := keyNames[i] + curValue := m.GetCounter().GetValue() + + // Adjust the counter value if found + if prevValue, ok := prevPromMetricValues[key]; ok { + *m.GetCounter().Value -= prevValue + } + + // Upsert the cache of previous counter values + prevPromMetricValues[key] = curValue + } +} + +// Convert ... +// 1. Prom Counters from monotonic to non-monotonic by resetting the counter during this call +// 2. Prom Histograms buckets counters from monotonic to non-monotonic by resetting the counter during this call +func (a *atel) convertPromMetricToDatadogMetricsValues(mt dto.MetricType, metricName string, metrics []*dto.Metric) { + if len(metrics) > 0 && (mt == dto.MetricType_COUNTER || mt == dto.MetricType_HISTOGRAM) { + // Build the keys for the metrics (or buckets) to cache their previous values + keyNames := buildKeysForMetricsPreviousValues(mt, metricName, metrics) + + a.prevPromMetricValuesMU.Lock() + defer a.prevPromMetricValuesMU.Unlock() + if mt == dto.MetricType_HISTOGRAM { + convertPromHistogramsToDatadogHistogramsValues(metrics, a.prevPromMetricHistogramValues, keyNames) + } else { + convertPromCountersToDatadogCountersValues(metrics, a.prevPromMetricCounterValues, keyNames) + } + } +} + func isMetricFiltered(p *Profile, mCfg *MetricConfig, mt dto.MetricType, m *dto.Metric) bool { // filter out zero values if specified in the profile if p.excludeZeroMetric && isZeroValueMetric(mt, m) { @@ -288,13 +394,17 @@ func (a *atel) transformMetricFamily(p *Profile, mfam *dto.MetricFamily) *agentm } } - amt := a.aggregateMetricTags(mCfg, mt, fm) - // nothing to report if len(fm) == 0 { return nil } + // Aggregate the metric tags + amt := a.aggregateMetricTags(mCfg, mt, fm) + + // Convert Prom Metrics values to the corresponding Datadog metrics style values + a.convertPromMetricToDatadogMetricsValues(mt, mCfg.Name, amt) + return &agentmetric{ name: mCfg.Name, metrics: amt, @@ -392,12 +502,18 @@ func (a *atel) GetAsJSON() ([]byte, error) { return nil, fmt.Errorf("unable to marshal agent telemetry payload: %w", err) } - jsonPayloadScrubbed, err := scrubber.ScrubJSONString(string(jsonPayload)) + jsonPayloadScrubbed, err := scrubber.ScrubJSON(jsonPayload) if err != nil { return nil, fmt.Errorf("unable to scrub agent telemetry payload: %w", err) } - return []byte(jsonPayloadScrubbed), nil + var prettyPayload bytes.Buffer + err = json.Indent(&prettyPayload, jsonPayloadScrubbed, "", "\t") + if err != nil { + return nil, fmt.Errorf("unable to pretified agent telemetry payload: %w", err) + } + + return prettyPayload.Bytes(), nil } // start is called by FX when the application starts. diff --git a/comp/core/agenttelemetry/impl/agenttelemetry_test.go b/comp/core/agenttelemetry/impl/agenttelemetry_test.go index 7cd659a2a45f1..f0524cae80129 100644 --- a/comp/core/agenttelemetry/impl/agenttelemetry_test.go +++ b/comp/core/agenttelemetry/impl/agenttelemetry_test.go @@ -265,6 +265,35 @@ func (p *Payload) UnmarshalJSON(b []byte) (err error) { return fmt.Errorf("request_type should be either agent-metrics or message-batch") } +func getPayload(a *atel) (*Payload, error) { + payloadJSON, err := a.GetAsJSON() + if err != nil { + return nil, err + } + + var payload Payload + err = json.Unmarshal(payloadJSON, &payload) + return &payload, err +} + +func getPayloadMetric(a *atel, metricName string) (*MetricPayload, bool) { + payload, err := getPayload(a) + if err != nil { + return nil, false + } + metrics := payload.Payload.(AgentMetricsPayload).Metrics + if metricItf, ok := metrics[metricName]; ok { + metric := metricItf.(MetricPayload) + return &metric, true + } + + return nil, false +} + +// Validate the payload + +// metric, ok := metrics["foo.bar"] + // ------------------------------ // Tests @@ -613,10 +642,7 @@ func TestTwoProfilesOnTheSameScheduleGenerateSinglePayload(t *testing.T) { require.True(t, a.enabled) // Get payload - payloadJSON, err := a.GetAsJSON() - assert.NoError(t, err) - var payload Payload - err = json.Unmarshal(payloadJSON, &payload) + payload, err := getPayload(a) require.NoError(t, err) // ----------------------- @@ -653,7 +679,7 @@ func TestOneProfileWithOneMetricMultipleContextsGenerateTwoPayloads(t *testing.T require.True(t, a.enabled) payloadJSON, err := a.GetAsJSON() - assert.NoError(t, err) + require.NoError(t, err) var payload map[string]interface{} err = json.Unmarshal(payloadJSON, &payload) require.NoError(t, err) @@ -729,10 +755,7 @@ func TestOneProfileWithTwoMetricGenerateSinglePayloads(t *testing.T) { require.True(t, a.enabled) // Get payload - payloadJSON, err := a.GetAsJSON() - assert.NoError(t, err) - var payload Payload - err = json.Unmarshal(payloadJSON, &payload) + payload, err := getPayload(a) require.NoError(t, err) // ----------------------- @@ -915,10 +938,7 @@ func TestGetAsJSONScrub(t *testing.T) { require.True(t, a.enabled) // Get payload - payloadJSON, err := a.GetAsJSON() - assert.NoError(t, err) - var payload Payload - err = json.Unmarshal(payloadJSON, &payload) + payload, err := getPayload(a) require.NoError(t, err) // Check the scrubbing @@ -934,3 +954,348 @@ func TestGetAsJSONScrub(t *testing.T) { require.True(t, ok) assert.Equal(t, "test", metric.(MetricPayload).Tags["text"]) } + +func TestAdjustPrometheusCounterValue(t *testing.T) { + var c = ` + agent_telemetry: + enabled: true + profiles: + - name: xxx + metric: + metrics: + - name: foo.bar + aggregate_tags: + - tag1 + - tag2 + - name: foo.cat + aggregate_tags: + - tag + - name: zoo.bar + aggregate_tags: + - tag1 + - tag2 + - name: zoo.cat + ` + + // setup and initiate atel + tel := makeTelMock(t) + o := convertYamlStrToMap(t, c) + s := makeSenderImpl(t, c) + r := newRunnerMock() + a := getTestAtel(t, tel, o, s, nil, r) + require.True(t, a.enabled) + + // setup metrics using few family names, metric names and tag- and tag-less counters + // to test various scenarios + counter1 := tel.NewCounter("foo", "bar", []string{"tag1", "tag2"}, "") + counter2 := tel.NewCounter("foo", "cat", []string{"tag"}, "") + counter3 := tel.NewCounter("zoo", "bar", []string{"tag1", "tag2"}, "") + counter4 := tel.NewCounter("zoo", "cat", nil, "") + + // First addition (expected values should be the same as the added values) + counter1.AddWithTags(1, map[string]string{"tag1": "tag1val", "tag2": "tag2val"}) + counter2.AddWithTags(2, map[string]string{"tag": "tagval"}) + counter3.AddWithTags(3, map[string]string{"tag1": "tag1val", "tag2": "tag2val"}) + counter4.Add(4) + payload1, err1 := getPayload(a) + require.NoError(t, err1) + metrics1 := payload1.Payload.(AgentMetricsPayload).Metrics + expecVals1 := map[string]float64{ + "foo.bar": 1.0, + "foo.cat": 2.0, + "zoo.bar": 3.0, + "zoo.cat": 4.0, + } + for ek, ev := range expecVals1 { + v, ok := metrics1[ek] + require.True(t, ok) + assert.Equal(t, ev, v.(MetricPayload).Value) + } + + // Second addition (expected values should be the same as the added values) + counter1.AddWithTags(10, map[string]string{"tag1": "tag1val", "tag2": "tag2val"}) + counter2.AddWithTags(20, map[string]string{"tag": "tagval"}) + counter3.AddWithTags(30, map[string]string{"tag1": "tag1val", "tag2": "tag2val"}) + counter4.Add(40) + payload2, err2 := getPayload(a) + require.NoError(t, err2) + metrics2 := payload2.Payload.(AgentMetricsPayload).Metrics + expecVals2 := map[string]float64{ + "foo.bar": 10.0, + "foo.cat": 20.0, + "zoo.bar": 30.0, + "zoo.cat": 40.0, + } + for ek, ev := range expecVals2 { + v, ok := metrics2[ek] + require.True(t, ok) + assert.Equal(t, ev, v.(MetricPayload).Value) + } + + // Third and fourth addition (expected values should be the sum of 3rd and 4th values) + counter1.AddWithTags(100, map[string]string{"tag1": "tag1val", "tag2": "tag2val"}) + counter2.AddWithTags(200, map[string]string{"tag": "tagval"}) + counter3.AddWithTags(300, map[string]string{"tag1": "tag1val", "tag2": "tag2val"}) + counter4.Add(400) + counter1.AddWithTags(1000, map[string]string{"tag1": "tag1val", "tag2": "tag2val"}) + counter2.AddWithTags(2000, map[string]string{"tag": "tagval"}) + counter3.AddWithTags(3000, map[string]string{"tag1": "tag1val", "tag2": "tag2val"}) + counter4.Add(4000) + payload34, err34 := getPayload(a) + require.NoError(t, err34) + metrics34 := payload34.Payload.(AgentMetricsPayload).Metrics + expecVals34 := map[string]float64{ + "foo.bar": 1100.0, + "foo.cat": 2200.0, + "zoo.bar": 3300.0, + "zoo.cat": 4400.0, + } + for ek, ev := range expecVals34 { + v, ok := metrics34[ek] + require.True(t, ok) + assert.Equal(t, ev, v.(MetricPayload).Value) + } + + // No addition (expected values should be zero) + payload5, err5 := getPayload(a) + require.NoError(t, err5) + metrics5 := payload5.Payload.(AgentMetricsPayload).Metrics + expecVals5 := map[string]float64{ + "foo.bar": 0.0, + "foo.cat": 0.0, + "zoo.bar": 0.0, + "zoo.cat": 0.0, + } + for ek, ev := range expecVals5 { + v, ok := metrics5[ek] + require.True(t, ok) + assert.Equal(t, ev, v.(MetricPayload).Value) + } +} + +func TestHistogramFloatUpperBoundNormalization(t *testing.T) { + var c = ` + agent_telemetry: + enabled: true + profiles: + - name: xxx + metric: + metrics: + - name: foo.bar + ` + + // setup and initiate atel + tel := makeTelMock(t) + o := convertYamlStrToMap(t, c) + s := makeSenderImpl(t, c) + r := newRunnerMock() + a := getTestAtel(t, tel, o, s, nil, r) + require.True(t, a.enabled) + + // setup and initiate atel + hist := tel.NewHistogram("foo", "bar", nil, "", []float64{1, 2, 5, 100}) + // bucket 0 - 5 + hist.Observe(1) + hist.Observe(1) + hist.Observe(1) + hist.Observe(1) + hist.Observe(1) + // bucket 1 - 0 + // .. + // bucket 2 - 3 + hist.Observe(5) + hist.Observe(5) + hist.Observe(5) + // bucket 4 - 6 + hist.Observe(6) + hist.Observe(100) + hist.Observe(100) + hist.Observe(100) + hist.Observe(100) + hist.Observe(100) + + // Test payload1 + metric1, ok := getPayloadMetric(a, "foo.bar") + require.True(t, ok) + require.True(t, len(metric1.Buckets) > 0) + expecVals1 := map[string]uint64{ + "1": 5, + "2": 0, + "5": 3, + "100": 6, + } + for k, b := range metric1.Buckets { + assert.Equal(t, expecVals1[k], b) + } + + // Test payload2 (no new observations, everything is reset) + metric2, ok := getPayloadMetric(a, "foo.bar") + require.True(t, ok) + require.True(t, len(metric2.Buckets) > 0) + expecVals2 := map[string]uint64{ + "1": 0, + "2": 0, + "5": 0, + "100": 0, + } + for k, b := range metric2.Buckets { + assert.Equal(t, expecVals2[k], b) + } + + // Repeat the same observation with the same results) + // bucket 0 - 5 + hist.Observe(1) + hist.Observe(1) + hist.Observe(1) + hist.Observe(1) + hist.Observe(1) + // bucket 1 - 0 + // .. + // bucket 2 - 3 + hist.Observe(5) + hist.Observe(5) + hist.Observe(5) + // bucket 4 - 6 + hist.Observe(6) + hist.Observe(100) + hist.Observe(100) + hist.Observe(100) + hist.Observe(100) + hist.Observe(100) + // Test payload3 + metric3, ok := getPayloadMetric(a, "foo.bar") + require.True(t, ok) + require.True(t, len(metric3.Buckets) > 0) + expecVals3 := map[string]uint64{ + "1": 5, + "2": 0, + "5": 3, + "100": 6, + } + for k, b := range metric3.Buckets { + assert.Equal(t, expecVals3[k], b) + } + + // Test raw buckets, they should be still accumulated + rawHist := hist.WithTags(nil) + expecVals4 := []uint64{10, 10, 16, 28} + for i, b := range rawHist.Get().Buckets { + assert.Equal(t, expecVals4[i], b.Count) + } +} + +// The same as above but with tags (to make sure that indexing with tags works) +func TestHistogramFloatUpperBoundNormalizationWithTags(t *testing.T) { + var c = ` + agent_telemetry: + enabled: true + profiles: + - name: xxx + metric: + metrics: + - name: foo.bar + aggregate_tags: + - tag1 + - tag2 + ` + + // setup and initiate atel + tel := makeTelMock(t) + o := convertYamlStrToMap(t, c) + s := makeSenderImpl(t, c) + r := newRunnerMock() + a := getTestAtel(t, tel, o, s, nil, r) + require.True(t, a.enabled) + + // setup and initiate atel + hist := tel.NewHistogram("foo", "bar", []string{"tag1", "tag2"}, "", []float64{1, 2, 5, 100}) + // bucket 0 - 5 + hist.Observe(1, "val1", "val2") + hist.Observe(1, "val1", "val2") + hist.Observe(1, "val1", "val2") + hist.Observe(1, "val1", "val2") + hist.Observe(1, "val1", "val2") + // bucket 1 - 0 + // .. + // bucket 2 - 3 + hist.Observe(5, "val1", "val2") + hist.Observe(5, "val1", "val2") + hist.Observe(5, "val1", "val2") + // bucket 4 - 6 + hist.Observe(6, "val1", "val2") + hist.Observe(100, "val1", "val2") + hist.Observe(100, "val1", "val2") + hist.Observe(100, "val1", "val2") + hist.Observe(100, "val1", "val2") + hist.Observe(100, "val1", "val2") + + // Test payload1 + metric1, ok := getPayloadMetric(a, "foo.bar") + require.True(t, ok) + require.True(t, len(metric1.Buckets) > 0) + expecVals1 := map[string]uint64{ + "1": 5, + "2": 0, + "5": 3, + "100": 6, + } + for k, b := range metric1.Buckets { + assert.Equal(t, expecVals1[k], b) + } + + // Test payload2 (no new observations, everything is reset) + metric2, ok := getPayloadMetric(a, "foo.bar") + require.True(t, ok) + require.True(t, len(metric2.Buckets) > 0) + expecVals2 := map[string]uint64{ + "1": 0, + "2": 0, + "5": 0, + "100": 0, + } + for k, b := range metric2.Buckets { + assert.Equal(t, expecVals2[k], b) + } + + // Repeat the same observation with the same results) + // bucket 0 - 5 + hist.Observe(1, "val1", "val2") + hist.Observe(1, "val1", "val2") + hist.Observe(1, "val1", "val2") + hist.Observe(1, "val1", "val2") + hist.Observe(1, "val1", "val2") + // bucket 1 - 0 + // .. + // bucket 2 - 3 + hist.Observe(5, "val1", "val2") + hist.Observe(5, "val1", "val2") + hist.Observe(5, "val1", "val2") + // bucket 4 - 6 + hist.Observe(6, "val1", "val2") + hist.Observe(100, "val1", "val2") + hist.Observe(100, "val1", "val2") + hist.Observe(100, "val1", "val2") + hist.Observe(100, "val1", "val2") + hist.Observe(100, "val1", "val2") + // Test payload3 + metric3, ok := getPayloadMetric(a, "foo.bar") + require.True(t, ok) + require.True(t, len(metric3.Buckets) > 0) + expecVals3 := map[string]uint64{ + "1": 5, + "2": 0, + "5": 3, + "100": 6, + } + for k, b := range metric3.Buckets { + assert.Equal(t, expecVals3[k], b) + } + + // Test raw buckets, they should be still accumulated + tags := map[string]string{"tag1": "val1", "tag2": "val2"} + rawHist := hist.WithTags(tags) + expecVals4 := []uint64{10, 10, 16, 28} + for i, b := range rawHist.Get().Buckets { + assert.Equal(t, expecVals4[i], b.Count) + } +} diff --git a/comp/core/agenttelemetry/impl/sender.go b/comp/core/agenttelemetry/impl/sender.go index d715863f862e4..585807eb6a539 100644 --- a/comp/core/agenttelemetry/impl/sender.go +++ b/comp/core/agenttelemetry/impl/sender.go @@ -14,6 +14,7 @@ import ( "net/http" "net/url" "strconv" + "strings" "time" dto "github.com/prometheus/client_model/go" @@ -131,7 +132,7 @@ type MetricPayload struct { Value float64 `json:"value"` Type string `json:"type"` Tags map[string]interface{} `json:"tags,omitempty"` - Buckets map[string]interface{} `json:"buckets,omitempty"` + Buckets map[string]uint64 `json:"buckets,omitempty"` } func httpClientFactory(cfg config.Reader, timeout time.Duration) func() *http.Client { @@ -240,17 +241,18 @@ func (s *senderImpl) addMetricPayload( metricType := metricFamily.GetType() switch metricType { case dto.MetricType_COUNTER: - payload.Type = "monotonic" + payload.Type = "counter" payload.Value = metric.GetCounter().GetValue() case dto.MetricType_GAUGE: payload.Type = "gauge" payload.Value = metric.GetGauge().GetValue() case dto.MetricType_HISTOGRAM: payload.Type = "histogram" - payload.Buckets = make(map[string]interface{}, 0) + payload.Buckets = make(map[string]uint64, 0) histogram := metric.GetHistogram() for _, bucket := range histogram.GetBucket() { - boundName := fmt.Sprintf("upperbound_%v", bucket.GetUpperBound()) + boundNameRaw := fmt.Sprintf("%v", bucket.GetUpperBound()) + boundName := strings.ReplaceAll(boundNameRaw, ".", "_") payload.Buckets[boundName] = bucket.GetCumulativeCount() } } @@ -319,7 +321,7 @@ func (s *senderImpl) flushSession(ss *senderSession) error { return fmt.Errorf("failed to marshal agent telemetry payload: %w", err) } - reqBody, err := scrubber.ScrubBytes(payloadJSON) + reqBody, err := scrubber.ScrubJSON(payloadJSON) if err != nil { return fmt.Errorf("failed to scrubl agent telemetry payload: %w", err) } diff --git a/comp/core/tagger/collectors/workloadmeta_extract.go b/comp/core/tagger/collectors/workloadmeta_extract.go index a3a14688f45cf..0ecf41398b582 100644 --- a/comp/core/tagger/collectors/workloadmeta_extract.go +++ b/comp/core/tagger/collectors/workloadmeta_extract.go @@ -225,8 +225,10 @@ func (c *WorkloadMetaCollector) handleContainer(ev workloadmeta.Event) []*types. } // static tags for ECS and EKS Fargate containers - for tag, value := range c.staticTags { - tagList.AddLow(tag, value) + for tag, valueList := range c.staticTags { + for _, value := range valueList { + tagList.AddLow(tag, value) + } } // gpu tags from container resource requests @@ -399,8 +401,10 @@ func (c *WorkloadMetaCollector) extractTagsFromPodEntity(pod *workloadmeta.Kuber } // static tags for EKS Fargate pods - for tag, value := range c.staticTags { - tagList.AddLow(tag, value) + for tag, valueList := range c.staticTags { + for _, value := range valueList { + tagList.AddLow(tag, value) + } } low, orch, high, standard := tagList.Compute() diff --git a/comp/core/tagger/collectors/workloadmeta_main.go b/comp/core/tagger/collectors/workloadmeta_main.go index 23c5973e363e4..59d82862ddf02 100644 --- a/comp/core/tagger/collectors/workloadmeta_main.go +++ b/comp/core/tagger/collectors/workloadmeta_main.go @@ -57,7 +57,7 @@ type WorkloadMetaCollector struct { containerEnvAsTags map[string]string containerLabelsAsTags map[string]string - staticTags map[string]string + staticTags map[string][]string // for ECS and EKS Fargate k8sResourcesAnnotationsAsTags map[string]map[string]string k8sResourcesLabelsAsTags map[string]map[string]string globContainerLabels map[string]glob.Glob @@ -91,42 +91,51 @@ func (c *WorkloadMetaCollector) initK8sResourcesMetaAsTags(resourcesLabelsAsTags // Run runs the continuous event watching loop and sends new tags to the // tagger based on the events sent by the workloadmeta. -func (c *WorkloadMetaCollector) Run(ctx context.Context) { - c.collectStaticGlobalTags(ctx) +func (c *WorkloadMetaCollector) Run(ctx context.Context, datadogConfig config.Component) { + c.collectStaticGlobalTags(ctx, datadogConfig) c.stream(ctx) } -func (c *WorkloadMetaCollector) collectStaticGlobalTags(ctx context.Context) { +func (c *WorkloadMetaCollector) collectStaticGlobalTags(ctx context.Context, datadogConfig config.Component) { c.staticTags = util.GetStaticTags(ctx) if _, exists := c.staticTags[clusterTagNamePrefix]; flavor.GetFlavor() == flavor.ClusterAgent && !exists { // If we are running the cluster agent, we want to set the kube_cluster_name tag as a global tag if we are able // to read it, for the instances where we are running in an environment where hostname cannot be detected. if cluster := clustername.GetClusterNameTagValue(ctx, ""); cluster != "" { if c.staticTags == nil { - c.staticTags = make(map[string]string, 1) + c.staticTags = make(map[string][]string, 1) } - c.staticTags[clusterTagNamePrefix] = cluster + if _, exists := c.staticTags[clusterTagNamePrefix]; !exists { + c.staticTags[clusterTagNamePrefix] = []string{} + } + c.staticTags[clusterTagNamePrefix] = append(c.staticTags[clusterTagNamePrefix], cluster) } } - if len(c.staticTags) > 0 { - tags := taglist.NewTagList() + // These are the global tags that should only be applied to the internal global entity on DCA. + // Whereas the static tags are applied to containers and pods directly as well. + globalEnvTags := util.GetGlobalEnvTags(datadogConfig) - for tag, value := range c.staticTags { - tags.AddLow(tag, value) - } + tagList := taglist.NewTagList() - low, orch, high, standard := tags.Compute() - c.tagProcessor.ProcessTagInfo([]*types.TagInfo{ - { - Source: staticSource, - EntityID: common.GetGlobalEntityID(), - HighCardTags: high, - OrchestratorCardTags: orch, - LowCardTags: low, - StandardTags: standard, - }, - }) + for _, tags := range []map[string][]string{c.staticTags, globalEnvTags} { + for tagKey, valueList := range tags { + for _, value := range valueList { + tagList.AddLow(tagKey, value) + } + } } + + low, orch, high, standard := tagList.Compute() + c.tagProcessor.ProcessTagInfo([]*types.TagInfo{ + { + Source: staticSource, + EntityID: common.GetGlobalEntityID(), + HighCardTags: high, + OrchestratorCardTags: orch, + LowCardTags: low, + StandardTags: standard, + }, + }) } func (c *WorkloadMetaCollector) stream(ctx context.Context) { diff --git a/comp/core/tagger/collectors/workloadmeta_test.go b/comp/core/tagger/collectors/workloadmeta_test.go index 48d8ea9691f37..93c2cf153d579 100644 --- a/comp/core/tagger/collectors/workloadmeta_test.go +++ b/comp/core/tagger/collectors/workloadmeta_test.go @@ -116,7 +116,7 @@ func TestHandleKubePod(t *testing.T) { tests := []struct { name string - staticTags map[string]string + staticTags map[string][]string k8sResourcesAnnotationsAsTags map[string]map[string]string k8sResourcesLabelsAsTags map[string]map[string]string pod workloadmeta.KubernetesPod @@ -789,8 +789,8 @@ func TestHandleKubePod(t *testing.T) { }, { name: "static tags", - staticTags: map[string]string{ - "eks_fargate_node": "foobar", + staticTags: map[string][]string{ + "eks_fargate_node": {"foobar"}, }, pod: workloadmeta.KubernetesPod{ EntityID: podEntityID, @@ -961,7 +961,7 @@ func TestHandleKubePodWithoutPvcAsTags(t *testing.T) { tests := []struct { name string - staticTags map[string]string + staticTags map[string][]string labelsAsTags map[string]string annotationsAsTags map[string]string nsLabelsAsTags map[string]string @@ -1117,7 +1117,7 @@ func TestHandleKubePodNoContainerName(t *testing.T) { tests := []struct { name string - staticTags map[string]string + staticTags map[string][]string labelsAsTags map[string]string annotationsAsTags map[string]string nsLabelsAsTags map[string]string @@ -1617,7 +1617,7 @@ func TestHandleContainer(t *testing.T) { tests := []struct { name string - staticTags map[string]string + staticTags map[string][]string labelsAsTags map[string]string envAsTags map[string]string container workloadmeta.Container @@ -2098,8 +2098,8 @@ func TestHandleContainer(t *testing.T) { }, { name: "static tags", - staticTags: map[string]string{ - "eks_fargate_node": "foobar", + staticTags: map[string][]string{ + "eks_fargate_node": {"foobar"}, }, container: workloadmeta.Container{ EntityID: entityID, @@ -2405,6 +2405,48 @@ func TestHandlePodWithDeletedContainer(t *testing.T) { assert.True(t, found, "TagInfo of deleted container not returned") } +func TestNoGlobalTags(t *testing.T) { + // This test checks that the tagger doesn't set any global entity tags on node agent + + mockConfig := configmock.New(t) + collectorCh := make(chan []*types.TagInfo, 10) + fakeProcessor := &fakeProcessor{ch: collectorCh} + + // Global tags that SHOULD NOT be stored in the tagger's global entity + mockConfig.SetWithoutSource("tags", []string{"some:tag"}) + mockConfig.SetWithoutSource("extra_tags", []string{"extra:tag"}) + mockConfig.SetWithoutSource("cluster_checks.extra_tags", []string{"cluster:tag"}) + mockConfig.SetWithoutSource("orchestrator_explorer.extra_tags", []string{"orch:tag"}) + + wmetaCollector := NewWorkloadMetaCollector(context.Background(), mockConfig, nil, fakeProcessor) + wmetaCollector.collectStaticGlobalTags(context.Background(), mockConfig) + + close(collectorCh) + + expectedEmptyEvent := &types.TagInfo{ + Source: staticSource, + EntityID: common.GetGlobalEntityID(), + HighCardTags: []string{}, + OrchestratorCardTags: []string{}, + LowCardTags: []string{}, + StandardTags: []string{}, + } + + var actualStaticSourceEvent *types.TagInfo + for evBundle := range collectorCh { + for _, event := range evBundle { + if event.Source == staticSource { + actualStaticSourceEvent = event + break + } + } + } + assert.True(t, reflect.DeepEqual(actualStaticSourceEvent, expectedEmptyEvent), + "Global Entity should be set with no tags:\nexpected: %v\nfound: %v ", + expectedEmptyEvent, actualStaticSourceEvent, + ) +} + func TestParseJSONValue(t *testing.T) { tests := []struct { name string diff --git a/comp/core/tagger/impl/local_tagger.go b/comp/core/tagger/impl/local_tagger.go index 8b6e524b0c577..acde0b3379b0b 100644 --- a/comp/core/tagger/impl/local_tagger.go +++ b/comp/core/tagger/impl/local_tagger.go @@ -60,7 +60,7 @@ func (t *localTagger) Start(ctx context.Context) error { ) go t.tagStore.Run(t.ctx) - go t.collector.Run(t.ctx) + go t.collector.Run(t.ctx, t.cfg) return nil } diff --git a/comp/core/workloadmeta/collectors/internal/containerd/image_sbom_trivy.go b/comp/core/workloadmeta/collectors/internal/containerd/image_sbom_trivy.go index 9867d2a6cb093..ccda07525ba30 100644 --- a/comp/core/workloadmeta/collectors/internal/containerd/image_sbom_trivy.go +++ b/comp/core/workloadmeta/collectors/internal/containerd/image_sbom_trivy.go @@ -138,11 +138,11 @@ func convertScanResultToSBOM(result sbom.ScanResult) *workloadmeta.SBOM { var report *cyclonedx.BOM if result.Error != nil { - log.Errorf("Failed to generate SBOM for containerd image: %s", result.Error) + log.Debugf("Failed to generate SBOM for containerd image: %s", result.Error) status = workloadmeta.Failed reportedError = result.Error.Error() } else if bom, err := result.Report.ToCycloneDX(); err != nil { - log.Errorf("Failed to extract SBOM from report") + log.Debugf("Failed to extract SBOM from report") status = workloadmeta.Failed reportedError = err.Error() } else { diff --git a/comp/core/workloadmeta/collectors/internal/crio/containers.go b/comp/core/workloadmeta/collectors/internal/crio/containers.go index 8076b4dc4e0d8..7dab17440f43a 100644 --- a/comp/core/workloadmeta/collectors/internal/crio/containers.go +++ b/comp/core/workloadmeta/collectors/internal/crio/containers.go @@ -5,7 +5,6 @@ //go:build crio -// Package crio implements the crio Workloadmeta collector. package crio import ( @@ -17,20 +16,19 @@ import ( v1 "k8s.io/cri-api/pkg/apis/runtime/v1" workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" - pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" "github.com/DataDog/datadog-agent/pkg/util/crio" "github.com/DataDog/datadog-agent/pkg/util/log" ) -// convertToEvent converts a CRI-O container to a workloadmeta event. -func (c *collector) convertToEvent(ctx context.Context, ctr *v1.Container) workloadmeta.CollectorEvent { +// convertContainerToEvent converts a CRI-O container to a workloadmeta event. +func (c *collector) convertContainerToEvent(ctx context.Context, ctr *v1.Container) workloadmeta.CollectorEvent { name := getContainerName(ctr.GetMetadata()) namespace := getPodNamespace(ctx, c.client, ctr.GetPodSandboxId()) containerStatus, info := getContainerStatus(ctx, c.client, ctr.GetId()) pid, hostname, cgroupsPath := parseContainerInfo(info) cpuLimit, memLimit := getResourceLimits(containerStatus, info) image := getContainerImage(ctx, c.client, ctr.GetImage()) - ports := extractPortsFromAnnotations(ctr.GetAnnotations()) + ports := parsePortsFromAnnotations(ctr.GetAnnotations()) return workloadmeta.CollectorEvent{ Type: workloadmeta.EventTypeSet, @@ -61,15 +59,6 @@ func (c *collector) convertToEvent(ctx context.Context, ctr *v1.Container) workl } } -// getCRIOSocketPath returns the configured CRI-O socket path or the default path. -func getCRIOSocketPath() string { - criSocket := pkgconfigsetup.Datadog().GetString("cri_socket_path") - if criSocket == "" { - return defaultCrioSocketPath - } - return criSocket -} - // getContainerName retrieves the container name. func getContainerName(containerMetadata *v1.ContainerMetadata) string { if containerMetadata == nil { @@ -93,88 +82,30 @@ func getContainerStatus(ctx context.Context, client crio.Client, containerID str statusResponse, err := client.GetContainerStatus(ctx, containerID) if err != nil || statusResponse.GetStatus() == nil { log.Errorf("Failed to get container status for container %s: %v", containerID, err) - return &v1.ContainerStatus{State: v1.ContainerState_CONTAINER_UNKNOWN}, make(map[string]string) + return nil, nil } status := statusResponse.GetStatus() info := statusResponse.GetInfo() return status, info } -// getResourceLimits extracts CPU and memory limits from container status or info as a fallback. -func getResourceLimits(containerStatus *v1.ContainerStatus, info map[string]string) (*float64, *uint64) { - // First, try to get resources from containerStatus - if containerStatus != nil && containerStatus.GetResources() != nil && containerStatus.GetResources().GetLinux() != nil { - var cpuLimit *float64 - var memLimit *uint64 - cpuPeriod := float64(containerStatus.GetResources().GetLinux().GetCpuPeriod()) - cpuQuota := float64(containerStatus.GetResources().GetLinux().GetCpuQuota()) - memLimitInBytes := uint64(containerStatus.GetResources().GetLinux().GetMemoryLimitInBytes()) - - if cpuPeriod != 0 && cpuQuota != 0 { - limit := cpuQuota / cpuPeriod - cpuLimit = &limit - } - if memLimitInBytes != 0 { - memLimit = &memLimitInBytes - } - return cpuLimit, memLimit - } - - if info == nil || info["info"] == "" { - log.Warn("Info map is nil or does not contain resource information") - return nil, nil - } - - // Fallback to parsing resources from info if status resources are nil - var parsedInfo struct { - RuntimeSpec struct { - Linux struct { - Resources struct { - CPU struct { - Quota int64 `json:"quota"` - Period int64 `json:"period"` - } `json:"cpu"` - Memory struct { - LimitInBytes int64 `json:"memoryLimitInBytes"` - } `json:"memory"` - } `json:"resources"` - } `json:"linux"` - } `json:"runtimeSpec"` - } - - if err := json.Unmarshal([]byte(info["info"]), &parsedInfo); err != nil { - log.Warnf("Failed to parse resources from container info: %v", err) - return nil, nil - } - - cpuPeriod := float64(parsedInfo.RuntimeSpec.Linux.Resources.CPU.Period) - cpuQuota := float64(parsedInfo.RuntimeSpec.Linux.Resources.CPU.Quota) - memLimitInBytes := uint64(parsedInfo.RuntimeSpec.Linux.Resources.Memory.LimitInBytes) - - var cpuLimit *float64 - var memLimit *uint64 - if cpuPeriod != 0 && cpuQuota != 0 { - limit := cpuQuota / cpuPeriod - cpuLimit = &limit - } - if memLimitInBytes != 0 { - memLimit = &memLimitInBytes - } - return cpuLimit, memLimit -} - // getContainerImage retrieves and converts a container image to workloadmeta format. func getContainerImage(ctx context.Context, client crio.Client, imageSpec *v1.ImageSpec) workloadmeta.ContainerImage { if imageSpec == nil { log.Warn("Image spec is nil, cannot fetch image") return workloadmeta.ContainerImage{} } - image, err := client.GetContainerImage(ctx, imageSpec) - if err != nil || image == nil { - log.Warnf("Failed to fetch image: %v", err) + imageResp, err := client.GetContainerImage(ctx, imageSpec, false) + if err != nil || imageResp == nil || imageResp.GetImage() == nil { + log.Warnf( + "Failed to fetch image, err: %v, imageResp is nil: %v, imageResp.GetImage() is nil: %v", + err, + imageResp == nil, + imageResp != nil && imageResp.GetImage() == nil, + ) return workloadmeta.ContainerImage{} } - + image := imageResp.GetImage() imgID := image.GetId() imgName := "" if len(image.GetRepoTags()) > 0 { @@ -182,9 +113,17 @@ func getContainerImage(ctx context.Context, client crio.Client, imageSpec *v1.Im } wmImg, err := workloadmeta.NewContainerImage(imgID, imgName) if err != nil { - log.Warnf("Failed to create image: %v", err) + log.Debugf("Failed to create image: %v", err) return workloadmeta.ContainerImage{} } + + imgIDAsDigest, err := parseDigests(image.GetRepoDigests()) + if err == nil { + wmImg.ID = imgIDAsDigest + } else if sbomCollectionIsEnabled() { + log.Warnf("Failed to parse digest for image with ID %s: %v. As a result, SBOM vulnerabilities may not be properly linked to this image.", imgID, err) + } + if len(image.GetRepoDigests()) > 0 { wmImg.RepoDigest = image.GetRepoDigests()[0] } @@ -200,9 +139,9 @@ func getContainerState(containerStatus *v1.ContainerStatus) workloadmeta.Contain return workloadmeta.ContainerState{ Running: containerStatus.GetState() == v1.ContainerState_CONTAINER_RUNNING, Status: mapContainerStatus(containerStatus.GetState()), - CreatedAt: time.Unix(0, containerStatus.GetCreatedAt()), - StartedAt: time.Unix(0, containerStatus.GetStartedAt()), - FinishedAt: time.Unix(0, containerStatus.GetFinishedAt()), + CreatedAt: time.Unix(0, containerStatus.GetCreatedAt()).UTC(), + StartedAt: time.Unix(0, containerStatus.GetStartedAt()).UTC(), + FinishedAt: time.Unix(0, containerStatus.GetFinishedAt()).UTC(), ExitCode: &exitCode, } } @@ -222,8 +161,8 @@ func mapContainerStatus(state v1.ContainerState) workloadmeta.ContainerStatus { return workloadmeta.ContainerStatusUnknown } -// generateUnsetEvent creates an unset event for a given container ID. -func generateUnsetEvent(seenID workloadmeta.EntityID) workloadmeta.CollectorEvent { +// generateUnsetContainerEvent creates an unset event for a given container ID. +func generateUnsetContainerEvent(seenID workloadmeta.EntityID) workloadmeta.CollectorEvent { return workloadmeta.CollectorEvent{ Type: workloadmeta.EventTypeUnset, Source: workloadmeta.SourceRuntime, @@ -233,27 +172,76 @@ func generateUnsetEvent(seenID workloadmeta.EntityID) workloadmeta.CollectorEven } } -// extractPortsFromAnnotations parses container ports from annotations. -func extractPortsFromAnnotations(annotations map[string]string) []workloadmeta.ContainerPort { +// getResourceLimits extracts CPU and memory limits from container status or info as a fallback. +func getResourceLimits(containerStatus *v1.ContainerStatus, info map[string]string) (*float64, *uint64) { + // First, try to get resources from containerStatus + if containerStatus != nil && containerStatus.GetResources() != nil && containerStatus.GetResources().GetLinux() != nil { + cpuPeriod := float64(containerStatus.GetResources().GetLinux().GetCpuPeriod()) + cpuQuota := float64(containerStatus.GetResources().GetLinux().GetCpuQuota()) + memLimitInBytes := uint64(containerStatus.GetResources().GetLinux().GetMemoryLimitInBytes()) + + var cpuLimit *float64 + var memLimit *uint64 + + if cpuPeriod != 0 && cpuQuota != 0 { + limit := cpuQuota / cpuPeriod + cpuLimit = &limit + } + if memLimitInBytes != 0 { + memLimit = &memLimitInBytes + } + return cpuLimit, memLimit + } + + // If containerStatus is nil or does not contain resource information, try to get resources from container info + return parseResourceLimitsFromInfo(info) +} + +// parseResourceLimitsFromInfo extracts CPU and memory limits from JSON-encoded container info. +func parseResourceLimitsFromInfo(info map[string]string) (*float64, *uint64) { + if info == nil || info["info"] == "" { + log.Debug("Info map is nil or does not contain resource information") + return nil, nil + } + + var parsed resourceInfo + if err := json.Unmarshal([]byte(info["info"]), &parsed); err != nil { + log.Debugf("Failed to parse resources from container info: %v", err) + return nil, nil + } + + cpuPeriod := float64(parsed.RuntimeSpec.Linux.Resources.CPU.Period) + cpuQuota := float64(parsed.RuntimeSpec.Linux.Resources.CPU.Quota) + memLimitInBytes := uint64(parsed.RuntimeSpec.Linux.Resources.Memory.LimitInBytes) + + var cpuLimit *float64 + var memLimit *uint64 + if cpuPeriod != 0 && cpuQuota != 0 { + limit := cpuQuota / cpuPeriod + cpuLimit = &limit + } + if memLimitInBytes != 0 { + memLimit = &memLimitInBytes + } + return cpuLimit, memLimit +} + +// parsePortsFromAnnotations parses container ports from annotations. +func parsePortsFromAnnotations(annotations map[string]string) []workloadmeta.ContainerPort { var wmContainerPorts []workloadmeta.ContainerPort if len(annotations) == 0 { - log.Warn("Annotations are nil or empty") + log.Debug("Annotations are nil or empty") return wmContainerPorts } for key, value := range annotations { if strings.Contains(key, "ports") { - var ports []struct { - Name string `json:"name"` - ContainerPort int `json:"containerPort"` - Protocol string `json:"protocol"` - HostPort uint16 `json:"hostPort"` - } + var ports []portAnnotation if err := json.Unmarshal([]byte(value), &ports); err != nil { - log.Warnf("Failed to parse ports from annotation %s: %v", key, err) - continue //skip to next annotation + log.Debugf("Failed to parse ports from annotation %s: %v", key, err) + continue // skip to next annotation } for _, port := range ports { @@ -271,32 +259,52 @@ func extractPortsFromAnnotations(annotations map[string]string) []workloadmeta.C // parseContainerInfo takes a map[string]string with JSON-encoded data and extracts PID, Hostname, and CgroupsPath. func parseContainerInfo(info map[string]string) (int, string, string) { - var pid int - var hostname, cgroupsPath string - if info == nil || info["info"] == "" { - log.Warn("Container info is nil or empty") - return pid, hostname, cgroupsPath + log.Debug("Container info is nil or empty") + return 0, "", "" } - var parsedInfo struct { - PID int `json:"pid"` - RuntimeSpec struct { - Hostname string `json:"hostname"` - Linux struct { - CgroupsPath string `json:"cgroupsPath"` - } `json:"linux"` - } `json:"runtimeSpec"` + var parsed containerInfo + if err := json.Unmarshal([]byte(info["info"]), &parsed); err != nil { + log.Debugf("Failed to parse container info: %v", err) + return 0, "", "" } - // Unmarshal the JSON string into the struct - if err := json.Unmarshal([]byte(info["info"]), &parsedInfo); err == nil { - pid = parsedInfo.PID - hostname = parsedInfo.RuntimeSpec.Hostname - cgroupsPath = parsedInfo.RuntimeSpec.Linux.CgroupsPath - } else { - log.Warnf("Failed to parse container info: %v", err) - } + return parsed.PID, parsed.RuntimeSpec.Hostname, parsed.RuntimeSpec.Linux.CgroupsPath +} + +// resourceInfo contains CPU and memory resource information. +type resourceInfo struct { + RuntimeSpec struct { + Linux struct { + Resources struct { + CPU struct { + Quota int64 `json:"quota"` + Period int64 `json:"period"` + } `json:"cpu"` + Memory struct { + LimitInBytes int64 `json:"memoryLimitInBytes"` + } `json:"memory"` + } `json:"resources"` + } `json:"linux"` + } `json:"runtimeSpec"` +} + +// portAnnotation contains container port information. +type portAnnotation struct { + Name string `json:"name"` + ContainerPort int `json:"containerPort"` + Protocol string `json:"protocol"` + HostPort uint16 `json:"hostPort"` +} - return pid, hostname, cgroupsPath +// containerInfo contains additional container information. +type containerInfo struct { + PID int `json:"pid"` + RuntimeSpec struct { + Hostname string `json:"hostname"` + Linux struct { + CgroupsPath string `json:"cgroupsPath"` + } `json:"linux"` + } `json:"runtimeSpec"` } diff --git a/comp/core/workloadmeta/collectors/internal/crio/crio.go b/comp/core/workloadmeta/collectors/internal/crio/crio.go index e7ccff6d4b6ae..b4fd38a5307c2 100644 --- a/comp/core/workloadmeta/collectors/internal/crio/crio.go +++ b/comp/core/workloadmeta/collectors/internal/crio/crio.go @@ -10,37 +10,42 @@ package crio import ( "context" + "fmt" + "os" "go.uber.org/fx" workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" "github.com/DataDog/datadog-agent/pkg/config/env" + pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" dderrors "github.com/DataDog/datadog-agent/pkg/errors" + "github.com/DataDog/datadog-agent/pkg/sbom/scanner" "github.com/DataDog/datadog-agent/pkg/util/crio" "github.com/DataDog/datadog-agent/pkg/util/log" ) const ( - collectorID = "crio" - componentName = "workloadmeta-crio" - defaultCrioSocketPath = "/var/run/crio/crio.sock" + collectorID = "crio" + componentName = "workloadmeta-crio" ) type collector struct { - id string - client crio.Client - store workloadmeta.Component - catalog workloadmeta.AgentType - seen map[workloadmeta.EntityID]struct{} + id string + client crio.Client + store workloadmeta.Component + catalog workloadmeta.AgentType + seenContainers map[workloadmeta.EntityID]struct{} + seenImages map[workloadmeta.EntityID]struct{} + sbomScanner *scanner.Scanner //nolint: unused } // NewCollector initializes a new CRI-O collector. func NewCollector() (workloadmeta.CollectorProvider, error) { return workloadmeta.CollectorProvider{ Collector: &collector{ - id: collectorID, - seen: make(map[workloadmeta.EntityID]struct{}), - catalog: workloadmeta.NodeAgent | workloadmeta.ProcessAgent, + id: collectorID, + seenContainers: make(map[workloadmeta.EntityID]struct{}), + catalog: workloadmeta.NodeAgent | workloadmeta.ProcessAgent, }, }, nil } @@ -51,20 +56,28 @@ func GetFxOptions() fx.Option { } // Start initializes the collector for workloadmeta. -func (c *collector) Start(_ context.Context, store workloadmeta.Component) error { +func (c *collector) Start(ctx context.Context, store workloadmeta.Component) error { if !env.IsFeaturePresent(env.Crio) { return dderrors.NewDisabled(componentName, "Crio not detected") } c.store = store - criSocket := getCRIOSocketPath() - client, err := crio.NewCRIOClient(criSocket) + client, err := crio.NewCRIOClient() if err != nil { - log.Errorf("CRI-O client creation failed for socket %s: %v", criSocket, err) - client.Close() - return err + return fmt.Errorf("CRI-O client creation failed: %v", err) } c.client = client + + if err := c.startSBOMCollection(ctx); err != nil { + return fmt.Errorf("SBOM collection initialization failed: %v", err) + } + + if imageMetadataCollectionIsEnabled() { + if err := checkOverlayImageDirectoryExists(); err != nil { + log.Warnf("Overlay image directory check failed: %v", err) + } + } + return nil } @@ -72,24 +85,61 @@ func (c *collector) Start(_ context.Context, store workloadmeta.Component) error func (c *collector) Pull(ctx context.Context) error { containers, err := c.client.GetAllContainers(ctx) if err != nil { - log.Errorf("Failed to pull container list: %v", err) - return err + return fmt.Errorf("failed to pull container list: %v", err) } - seen := make(map[workloadmeta.EntityID]struct{}) - events := make([]workloadmeta.CollectorEvent, 0, len(containers)) + seenContainers := make(map[workloadmeta.EntityID]struct{}) + seenImages := make(map[workloadmeta.EntityID]struct{}) + containerEvents := make([]workloadmeta.CollectorEvent, 0, len(containers)) + imageEvents := make([]workloadmeta.CollectorEvent, 0, len(containers)) + + collectImages := imageMetadataCollectionIsEnabled() + for _, container := range containers { - event := c.convertToEvent(ctx, container) - seen[event.Entity.GetID()] = struct{}{} - events = append(events, event) + // Generate container event + containerEvent := c.convertContainerToEvent(ctx, container) + seenContainers[containerEvent.Entity.GetID()] = struct{}{} + containerEvents = append(containerEvents, containerEvent) + + // Skip image collection if the condition is not met + if !collectImages { + continue + } + + imageEvent, err := c.generateImageEventFromContainer(ctx, container) + if err != nil { + log.Warnf("Image event generation failed for container %+v: %v", container, err) + continue + } + + imageID := imageEvent.Entity.GetID() + seenImages[imageID] = struct{}{} + imageEvents = append(imageEvents, *imageEvent) } - for seenID := range c.seen { - if _, ok := seen[seenID]; !ok { - events = append(events, generateUnsetEvent(seenID)) + + // Handle unset events for images if collecting images + if collectImages { + for seenID := range c.seenImages { + if _, ok := seenImages[seenID]; !ok { + unsetEvent := generateUnsetImageEvent(seenID) + imageEvents = append(imageEvents, *unsetEvent) + } } + c.seenImages = seenImages + c.store.Notify(imageEvents) } - c.seen = seen - c.store.Notify(events) + + // Handle unset events for containers + for seenID := range c.seenContainers { + if _, ok := seenContainers[seenID]; !ok { + unsetEvent := generateUnsetContainerEvent(seenID) + containerEvents = append(containerEvents, unsetEvent) + } + } + + c.seenContainers = seenContainers + c.store.Notify(containerEvents) + return nil } @@ -102,3 +152,24 @@ func (c *collector) GetID() string { func (c *collector) GetTargetCatalog() workloadmeta.AgentType { return c.catalog } + +// imageMetadataCollectionIsEnabled checks if image metadata collection is enabled via configuration. +func imageMetadataCollectionIsEnabled() bool { + return pkgconfigsetup.Datadog().GetBool("container_image.enabled") +} + +// sbomCollectionIsEnabled returns true if SBOM collection is enabled. +func sbomCollectionIsEnabled() bool { + return imageMetadataCollectionIsEnabled() && pkgconfigsetup.Datadog().GetBool("sbom.container_image.enabled") +} + +// checkOverlayImageDirectoryExists checks if the overlay-image directory exists. +func checkOverlayImageDirectoryExists() error { + overlayImagePath := crio.GetOverlayImagePath() + if _, err := os.Stat(overlayImagePath); os.IsNotExist(err) { + return fmt.Errorf("overlay-image directory %s does not exist. Ensure this directory is mounted to enable access to layer size and media type", overlayImagePath) + } else if err != nil { + return fmt.Errorf("failed to check overlay-image directory %s: %w. Ensure this directory is mounted to enable access to layer size and media type", overlayImagePath, err) + } + return nil +} diff --git a/comp/core/workloadmeta/collectors/internal/crio/crio_test.go b/comp/core/workloadmeta/collectors/internal/crio/crio_test.go index c2f4b60f316e7..fb6c8a280dbbd 100644 --- a/comp/core/workloadmeta/collectors/internal/crio/crio_test.go +++ b/comp/core/workloadmeta/collectors/internal/crio/crio_test.go @@ -10,470 +10,502 @@ package crio import ( "context" "errors" + "fmt" + "os" "testing" "time" + imgspecs "github.com/opencontainers/image-spec/specs-go/v1" "github.com/stretchr/testify/assert" v1 "k8s.io/cri-api/pkg/apis/runtime/v1" workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/util/pointer" ) -// Helper functions to create pointer values for testing -func floatPtr(f float64) *float64 { - return &f -} - -func uintPtr(u uint64) *uint64 { - return &u -} - -// fakeWorkloadmetaStore is a mock implementation of the workloadmeta store. -type fakeWorkloadmetaStore struct { - workloadmeta.Component - notifiedEvents []workloadmeta.CollectorEvent -} - -func (store *fakeWorkloadmetaStore) Notify(events []workloadmeta.CollectorEvent) { - store.notifiedEvents = append(store.notifiedEvents, events...) -} - -// fakeCRIOClient simulates the CRI-O client for testing purposes. -type fakeCRIOClient struct { - mockGetAllContainers func(ctx context.Context) ([]*v1.Container, error) - mockGetContainerStatus func(ctx context.Context, containerID string) (*v1.ContainerStatusResponse, error) - mockGetPodStatus func(ctx context.Context, podID string) (*v1.PodSandboxStatus, error) - mockGetContainerImage func(ctx context.Context, imageSpec *v1.ImageSpec) (*v1.Image, error) - mockRuntimeMetadata func(ctx context.Context) (*v1.VersionResponse, error) -} - -func (f *fakeCRIOClient) GetAllContainers(ctx context.Context) ([]*v1.Container, error) { - if f.mockGetAllContainers != nil { - return f.mockGetAllContainers(ctx) - } - return []*v1.Container{}, nil -} - -func (f *fakeCRIOClient) GetContainerStatus(ctx context.Context, containerID string) (*v1.ContainerStatusResponse, error) { - if f.mockGetContainerStatus != nil { - return f.mockGetContainerStatus(ctx, containerID) - } - return &v1.ContainerStatusResponse{}, nil -} - -func (f *fakeCRIOClient) GetPodStatus(ctx context.Context, podID string) (*v1.PodSandboxStatus, error) { - if f.mockGetPodStatus != nil { - return f.mockGetPodStatus(ctx, podID) - } - return &v1.PodSandboxStatus{}, nil -} - -func (f *fakeCRIOClient) GetContainerImage(ctx context.Context, imageSpec *v1.ImageSpec) (*v1.Image, error) { - if f.mockGetContainerImage != nil { - return f.mockGetContainerImage(ctx, imageSpec) - } - return &v1.Image{}, nil -} - -func (f *fakeCRIOClient) RuntimeMetadata(ctx context.Context) (*v1.VersionResponse, error) { - if f.mockRuntimeMetadata != nil { - return f.mockRuntimeMetadata(ctx) - } - return &v1.VersionResponse{RuntimeName: "cri-o", RuntimeVersion: "v1.30.0"}, nil -} - -func (f *fakeCRIOClient) Close() error { - return nil -} - -// TestPull verifies that Pull populates container data correctly with PID, Hostname, and CgroupPath. func TestPull(t *testing.T) { - client := &fakeCRIOClient{ - mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { - return []*v1.Container{ - {Id: "container1", PodSandboxId: "pod1", Metadata: &v1.ContainerMetadata{Name: "container1"}, Image: &v1.ImageSpec{Image: "myrepo/myimage:latest"}}, - }, nil - }, - mockGetPodStatus: func(_ context.Context, _ string) (*v1.PodSandboxStatus, error) { - return &v1.PodSandboxStatus{Metadata: &v1.PodSandboxMetadata{Namespace: "default"}}, nil - }, - mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { - return &v1.ContainerStatusResponse{ - Status: &v1.ContainerStatus{ - Metadata: &v1.ContainerMetadata{Name: "container1"}, - State: v1.ContainerState_CONTAINER_RUNNING, - CreatedAt: time.Now().Add(-10 * time.Minute).UnixNano(), - Resources: &v1.ContainerResources{ - Linux: &v1.LinuxContainerResources{ - CpuQuota: 50000, - CpuPeriod: 100000, - MemoryLimitInBytes: 104857600, + + const envVarName = "DD_CONTAINER_IMAGE_ENABLED" + originalValue := os.Getenv(envVarName) + defer os.Setenv(envVarName, originalValue) + + os.Setenv(envVarName, "false") + + createTime := time.Now().Add(-10 * time.Minute).UnixNano() + startTime := time.Now().Add(-5 * time.Minute).UnixNano() + finishTime := time.Now().UnixNano() + + tests := []struct { + name string + mockGetAllContainers func(ctx context.Context) ([]*v1.Container, error) + mockGetPodStatus func(ctx context.Context, podID string) (*v1.PodSandboxStatus, error) + mockGetContainerStatus func(ctx context.Context, containerID string) (*v1.ContainerStatusResponse, error) + mockGetContainerImage func(ctx context.Context, imageSpec *v1.ImageSpec, verbose bool) (*v1.ImageStatusResponse, error) + expectedEvents []workloadmeta.CollectorEvent + expectedError bool + }{ + { + name: "Valid container and image data", + mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { + return []*v1.Container{ + {Id: "container1", Image: &v1.ImageSpec{Image: "myrepo/myimage:latest"}, PodSandboxId: "pod1", Metadata: &v1.ContainerMetadata{Name: "container1"}}, + }, nil + }, + mockGetPodStatus: func(_ context.Context, _ string) (*v1.PodSandboxStatus, error) { + return &v1.PodSandboxStatus{Metadata: &v1.PodSandboxMetadata{Namespace: "default"}}, nil + }, + mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { + return &v1.ContainerStatusResponse{ + Status: &v1.ContainerStatus{ + Metadata: &v1.ContainerMetadata{Name: "container1"}, + State: v1.ContainerState_CONTAINER_RUNNING, + CreatedAt: createTime, + StartedAt: startTime, + FinishedAt: finishTime, + Resources: &v1.ContainerResources{ + Linux: &v1.LinuxContainerResources{ + CpuQuota: 50000, + CpuPeriod: 100000, + MemoryLimitInBytes: 104857600, + }, + }, + }, + }, nil + }, + mockGetContainerImage: func(_ context.Context, _ *v1.ImageSpec, _ bool) (*v1.ImageStatusResponse, error) { + return &v1.ImageStatusResponse{ + Image: &v1.Image{ + Id: "image123", + RepoTags: []string{"myrepo/myimage:latest"}, + RepoDigests: []string{"myrepo/myimage@sha256:123abc"}, + }, + }, nil + }, + expectedEvents: []workloadmeta.CollectorEvent{ + { + Type: workloadmeta.EventTypeSet, + Source: workloadmeta.SourceRuntime, + Entity: &workloadmeta.Container{ + EntityID: workloadmeta.EntityID{Kind: workloadmeta.KindContainer, ID: "container1"}, + EntityMeta: workloadmeta.EntityMeta{ + Name: "container1", + Namespace: "default", + }, + Image: workloadmeta.ContainerImage{ + Name: "myrepo/myimage", + ShortName: "myimage", + RawName: "myrepo/myimage:latest", + ID: "sha256:123abc", + Tag: "latest", + RepoDigest: "myrepo/myimage@sha256:123abc", + }, + Resources: workloadmeta.ContainerResources{ + CPULimit: pointer.Ptr(0.5), + MemoryLimit: pointer.Ptr(uint64(104857600)), + }, + Runtime: workloadmeta.ContainerRuntimeCRIO, + State: workloadmeta.ContainerState{ + Status: workloadmeta.ContainerStatusRunning, + Running: true, + CreatedAt: time.Unix(0, createTime).UTC(), + StartedAt: time.Unix(0, startTime).UTC(), + FinishedAt: time.Unix(0, finishTime).UTC(), + ExitCode: pointer.Ptr(int64(0)), }, }, }, - Info: map[string]string{ - "info": `{ - "pid": 12345, - "runtimeSpec": { - "hostname": "container-host", - "linux": { - "cgroupsPath": "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4" - } - } - }`, - }, - }, nil - }, - mockGetContainerImage: func(_ context.Context, _ *v1.ImageSpec) (*v1.Image, error) { - return &v1.Image{ - Id: "image123", - RepoTags: []string{"myrepo/myimage:latest"}, - RepoDigests: []string{"myrepo/myimage@sha256:123abc"}, - }, nil - }, - } - - store := &fakeWorkloadmetaStore{} - crioCollector := collector{ - client: client, - store: store, - } - - err := crioCollector.Pull(context.Background()) - assert.NoError(t, err) - assert.NotEmpty(t, store.notifiedEvents) - event := store.notifiedEvents[0] - container := event.Entity.(*workloadmeta.Container) - - assert.Equal(t, "container1", container.EntityMeta.Name) - assert.Equal(t, "default", container.EntityMeta.Namespace) - assert.Equal(t, "container1", container.EntityID.ID) - assert.Equal(t, floatPtr(0.5), container.Resources.CPULimit) - assert.Equal(t, uintPtr(104857600), container.Resources.MemoryLimit) - assert.Equal(t, "myrepo/myimage:latest", container.Image.RawName) - assert.Equal(t, 12345, container.PID) - assert.Equal(t, "container-host", container.Hostname) - assert.Equal(t, "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4", container.CgroupPath) -} - -// TestPullContainerStatusError verifies that Pull handles errors when retrieving container status. -func TestPullContainerStatusError(t *testing.T) { - client := &fakeCRIOClient{ - mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { - return []*v1.Container{ - {Id: "container1", PodSandboxId: "pod1"}, - }, nil + }, + expectedError: false, }, - mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { - return nil, errors.New("container status error") - }, - } - - store := &fakeWorkloadmetaStore{} - crioCollector := collector{ - client: client, - store: store, - } - - err := crioCollector.Pull(context.Background()) - assert.NoError(t, err) - assert.Len(t, store.notifiedEvents, 1) - event := store.notifiedEvents[0] - container := event.Entity.(*workloadmeta.Container) - - assert.Equal(t, workloadmeta.ContainerStatusUnknown, container.State.Status) - assert.Empty(t, container.Resources.CPULimit) - assert.Empty(t, container.Resources.MemoryLimit) - assert.Equal(t, 0, container.PID) // Default PID - assert.Equal(t, "", container.Hostname) // Default Hostname - assert.Equal(t, "", container.CgroupPath) // Default CgroupPath -} - -// TestPullNoPodNamespace verifies that Pull handles cases with a missing pod namespace. -func TestPullNoPodNamespace(t *testing.T) { - client := &fakeCRIOClient{ - mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { - return []*v1.Container{ - {Id: "container1", PodSandboxId: "nonexistent-pod"}, - }, nil - }, - mockGetPodStatus: func(_ context.Context, _ string) (*v1.PodSandboxStatus, error) { - return nil, errors.New("pod not found") - }, - mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { - return &v1.ContainerStatusResponse{ - Status: &v1.ContainerStatus{ - Metadata: &v1.ContainerMetadata{Name: "container1"}, - State: v1.ContainerState_CONTAINER_RUNNING, - CreatedAt: time.Now().Add(-10 * time.Minute).UnixNano(), - }, - Info: map[string]string{ - "info": `{ - "pid": 12345, - "runtimeSpec": { - "hostname": "container-host", - "linux": { - "cgroupsPath": "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4" + { + name: "Missing resources in container but available in Info", + mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { + return []*v1.Container{ + {Id: "container1", PodSandboxId: "pod1"}, + }, nil + }, + mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { + return &v1.ContainerStatusResponse{ + Status: &v1.ContainerStatus{ + Metadata: &v1.ContainerMetadata{Name: "container1"}, + State: v1.ContainerState_CONTAINER_RUNNING, + CreatedAt: createTime, + StartedAt: startTime, + FinishedAt: finishTime, + }, + Info: map[string]string{ + "info": `{ + "pid": 12345, + "runtimeSpec": { + "hostname": "container-host", + "linux": { + "cgroupsPath": "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4", + "resources": { + "cpu": { + "quota": 50000, + "period": 100000 + }, + "memory": { + "memoryLimitInBytes": 104857600 + } + } + } } - } - }`, + }`, + }, + }, nil + }, + expectedEvents: []workloadmeta.CollectorEvent{ + { + Type: workloadmeta.EventTypeSet, + Source: workloadmeta.SourceRuntime, + Entity: &workloadmeta.Container{ + EntityID: workloadmeta.EntityID{Kind: workloadmeta.KindContainer, ID: "container1"}, + Runtime: workloadmeta.ContainerRuntimeCRIO, + State: workloadmeta.ContainerState{ + Status: workloadmeta.ContainerStatusRunning, + Running: true, + CreatedAt: time.Unix(0, createTime).UTC(), + StartedAt: time.Unix(0, startTime).UTC(), + FinishedAt: time.Unix(0, finishTime).UTC(), + ExitCode: pointer.Ptr(int64(0)), + }, + Resources: workloadmeta.ContainerResources{ + CPULimit: pointer.Ptr(0.5), + MemoryLimit: pointer.Ptr(uint64(104857600)), + }, + PID: 12345, + Hostname: "container-host", + CgroupPath: "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4", + }, }, - }, nil + }, + expectedError: false, }, - } - - store := &fakeWorkloadmetaStore{} - crioCollector := collector{ - client: client, - store: store, - } - - err := crioCollector.Pull(context.Background()) - assert.NoError(t, err) - container := store.notifiedEvents[0].Entity.(*workloadmeta.Container) - - assert.Equal(t, "", container.EntityMeta.Namespace) // Namespace should be empty - assert.Equal(t, 12345, container.PID) - assert.Equal(t, "container-host", container.Hostname) - assert.Equal(t, "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4", container.CgroupPath) -} - -// TestPullContainerImageError verifies error handling when retrieving container image fails. -func TestPullContainerImageError(t *testing.T) { - client := &fakeCRIOClient{ - mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { - return []*v1.Container{ - {Id: "container1", PodSandboxId: "pod1"}, - }, nil - }, - mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { - return &v1.ContainerStatusResponse{ - Status: &v1.ContainerStatus{ - Metadata: &v1.ContainerMetadata{Name: "container1"}, - State: v1.ContainerState_CONTAINER_RUNNING, - CreatedAt: time.Now().Add(-10 * time.Minute).UnixNano(), - }, - Info: map[string]string{ - "info": `{ - "pid": 12345, - "runtimeSpec": { - "hostname": "container-host", - "linux": { - "cgroupsPath": "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4" - } - } - }`, + { + name: "Container with missing metadata", + mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { + return []*v1.Container{ + {Id: "container1", PodSandboxId: "pod1", Metadata: nil}, + }, nil + }, + mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { + return &v1.ContainerStatusResponse{ + Status: &v1.ContainerStatus{ + State: v1.ContainerState_CONTAINER_RUNNING, + }, + }, nil + }, + expectedEvents: []workloadmeta.CollectorEvent{ + { + Type: workloadmeta.EventTypeSet, + Source: workloadmeta.SourceRuntime, + Entity: &workloadmeta.Container{ + EntityID: workloadmeta.EntityID{Kind: workloadmeta.KindContainer, ID: "container1"}, + Runtime: workloadmeta.ContainerRuntimeCRIO, + State: workloadmeta.ContainerState{ + Running: true, + Status: workloadmeta.ContainerStatusRunning, + CreatedAt: time.Unix(0, 0).UTC(), + StartedAt: time.Unix(0, 0).UTC(), + FinishedAt: time.Unix(0, 0).UTC(), + ExitCode: pointer.Ptr(int64(0)), + }, + }, }, - }, nil - }, - mockGetContainerImage: func(_ context.Context, _ *v1.ImageSpec) (*v1.Image, error) { - return nil, errors.New("image retrieval error") + }, + expectedError: false, }, - } - - store := &fakeWorkloadmetaStore{} - crioCollector := collector{ - client: client, - store: store, - } - - err := crioCollector.Pull(context.Background()) - assert.NoError(t, err) - container := store.notifiedEvents[0].Entity.(*workloadmeta.Container) - - assert.Empty(t, container.Image.ID) - assert.Empty(t, container.Image.RawName) - assert.Equal(t, 12345, container.PID) - assert.Equal(t, "container-host", container.Hostname) - assert.Equal(t, "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4", container.CgroupPath) -} - -// TestPullNoContainers verifies that Pull handles an empty container list gracefully. -func TestPullNoContainers(t *testing.T) { - client := &fakeCRIOClient{ - mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { - return []*v1.Container{}, nil - }, - } - - store := &fakeWorkloadmetaStore{} - crioCollector := collector{ - client: client, - store: store, - } - - err := crioCollector.Pull(context.Background()) - assert.NoError(t, err) - assert.Empty(t, store.notifiedEvents) // Should have no events -} - -// TestPullContainerRetrievalError verifies that Pull handles an error when retrieving containers. -func TestPullContainerRetrievalError(t *testing.T) { - client := &fakeCRIOClient{ - mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { - return nil, errors.New("failed to retrieve containers") - }, - } - - store := &fakeWorkloadmetaStore{} - crioCollector := collector{ - client: client, - store: store, - } - - err := crioCollector.Pull(context.Background()) - assert.Error(t, err) - assert.Empty(t, store.notifiedEvents) // No events should be generated -} - -// TestPullContainerMissingMetadata verifies that Pull handles containers with missing metadata. -func TestPullContainerMissingMetadata(t *testing.T) { - client := &fakeCRIOClient{ - mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { - return []*v1.Container{ - {Id: "container1", PodSandboxId: "pod1", Metadata: nil}, // Missing metadata - }, nil - }, - mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { - return &v1.ContainerStatusResponse{ - Status: &v1.ContainerStatus{ - State: v1.ContainerState_CONTAINER_RUNNING, - }, - Info: map[string]string{ - "info": `{ - "pid": 12345, - "runtimeSpec": { - "hostname": "container-host", - "linux": { - "cgroupsPath": "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4" - } - } - }`, + { + name: "Error retrieving container status", + mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { + return []*v1.Container{ + {Id: "container1", PodSandboxId: "pod1"}, + }, nil + }, + mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { + return nil, errors.New("container status error") + }, + expectedEvents: []workloadmeta.CollectorEvent{ + { + Type: workloadmeta.EventTypeSet, + Source: workloadmeta.SourceRuntime, + Entity: &workloadmeta.Container{ + EntityID: workloadmeta.EntityID{Kind: workloadmeta.KindContainer, ID: "container1"}, + Runtime: workloadmeta.ContainerRuntimeCRIO, + State: workloadmeta.ContainerState{ + Status: workloadmeta.ContainerStatusUnknown, + }, + }, }, - }, nil + }, + expectedError: false, }, - } - - store := &fakeWorkloadmetaStore{} - crioCollector := collector{ - client: client, - store: store, - } - - err := crioCollector.Pull(context.Background()) - assert.NoError(t, err) - container := store.notifiedEvents[0].Entity.(*workloadmeta.Container) - - assert.Equal(t, "", container.EntityMeta.Name) // Default to unknown name - assert.Equal(t, 12345, container.PID) - assert.Equal(t, "container-host", container.Hostname) - assert.Equal(t, "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4", container.CgroupPath) -} - -// TestPullContainerDefaultResourceLimits verifies that Pull handles containers with default resource limits. -func TestPullContainerDefaultResourceLimits(t *testing.T) { - client := &fakeCRIOClient{ - mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { - return []*v1.Container{ - {Id: "container1", PodSandboxId: "pod1"}, - }, nil + { + name: "No containers returned", + mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { + return []*v1.Container{}, nil + }, + expectedEvents: nil, + expectedError: false, }, - mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { - return &v1.ContainerStatusResponse{ - Status: &v1.ContainerStatus{ - Metadata: &v1.ContainerMetadata{Name: "container1"}, - Resources: &v1.ContainerResources{ - Linux: &v1.LinuxContainerResources{ - CpuQuota: 0, CpuPeriod: 0, MemoryLimitInBytes: 0, + { + name: "Error retrieving containers", + mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { + return nil, errors.New("failed to retrieve containers") + }, + expectedEvents: nil, + expectedError: true, + }, + { + name: "All resource limits are zero", + mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { + return []*v1.Container{ + {Id: "container1", Image: &v1.ImageSpec{Image: "myrepo/myimage:latest"}, PodSandboxId: "pod1", Metadata: &v1.ContainerMetadata{Name: "container1"}}, + }, nil + }, + mockGetPodStatus: func(_ context.Context, _ string) (*v1.PodSandboxStatus, error) { + return &v1.PodSandboxStatus{Metadata: &v1.PodSandboxMetadata{Namespace: "default"}}, nil + }, + mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { + return &v1.ContainerStatusResponse{ + Status: &v1.ContainerStatus{ + Metadata: &v1.ContainerMetadata{Name: "container1"}, + State: v1.ContainerState_CONTAINER_RUNNING, + CreatedAt: createTime, + StartedAt: startTime, + FinishedAt: finishTime, + Resources: &v1.ContainerResources{ + Linux: &v1.LinuxContainerResources{ + CpuQuota: 0, + CpuPeriod: 0, + MemoryLimitInBytes: 0, + }, + }, + }, + }, nil + }, + expectedEvents: []workloadmeta.CollectorEvent{ + { + Type: workloadmeta.EventTypeSet, + Source: workloadmeta.SourceRuntime, + Entity: &workloadmeta.Container{ + EntityID: workloadmeta.EntityID{Kind: workloadmeta.KindContainer, ID: "container1"}, + EntityMeta: workloadmeta.EntityMeta{ + Name: "container1", + Namespace: "default", + }, + Image: workloadmeta.ContainerImage{ + Name: "myrepo/myimage", + ShortName: "myimage", + RawName: "myrepo/myimage:latest", + ID: "sha256:123abc", + Tag: "latest", + RepoDigest: "myrepo/myimage@sha256:123abc", + }, + Resources: workloadmeta.ContainerResources{ + CPULimit: nil, // No CPU limit + MemoryLimit: nil, // No memory limit + }, + Runtime: workloadmeta.ContainerRuntimeCRIO, + State: workloadmeta.ContainerState{ + Status: workloadmeta.ContainerStatusRunning, + Running: true, + CreatedAt: time.Unix(0, createTime).UTC(), + StartedAt: time.Unix(0, startTime).UTC(), + FinishedAt: time.Unix(0, finishTime).UTC(), + ExitCode: pointer.Ptr(int64(0)), }, }, }, - Info: map[string]string{ - "info": `{ - "pid": 12345, - "runtimeSpec": { - "hostname": "container-host", - "linux": { - "cgroupsPath": "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4" - } - } - }`, - }, - }, nil + }, + expectedError: false, + }, + { + name: "Error retrieving container", + mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { + return nil, errors.New("failed to retrieve containers") + }, + expectedEvents: nil, + expectedError: true, }, } - store := &fakeWorkloadmetaStore{} - crioCollector := collector{ - client: client, - store: store, + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := &mockCRIOClient{ + mockGetAllContainers: tt.mockGetAllContainers, + mockGetPodStatus: tt.mockGetPodStatus, + mockGetContainerStatus: tt.mockGetContainerStatus, + mockGetContainerImage: tt.mockGetContainerImage, + } + + store := &mockWorkloadmetaStore{} + crioCollector := collector{ + client: client, + store: store, + } + + err := crioCollector.Pull(context.Background()) + + if tt.expectedError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + + assert.Equal(t, tt.expectedEvents, store.notifiedEvents) + }) } - - err := crioCollector.Pull(context.Background()) - assert.NoError(t, err) - container := store.notifiedEvents[0].Entity.(*workloadmeta.Container) - - assert.Nil(t, container.Resources.CPULimit) - assert.Nil(t, container.Resources.MemoryLimit) - assert.Equal(t, 12345, container.PID) - assert.Equal(t, "container-host", container.Hostname) - assert.Equal(t, "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4", container.CgroupPath) } -// TestPullContainerResourceFallbackToInfo verifies that Pull uses resource limits from info when Resources in containerStatus is nil. -func TestPullContainerResourceFallbackToInfo(t *testing.T) { - client := &fakeCRIOClient{ - mockGetAllContainers: func(_ context.Context) ([]*v1.Container, error) { - return []*v1.Container{ - {Id: "container1", PodSandboxId: "pod1"}, - }, nil - }, - mockGetContainerStatus: func(_ context.Context, _ string) (*v1.ContainerStatusResponse, error) { - return &v1.ContainerStatusResponse{ - Status: &v1.ContainerStatus{ - Metadata: &v1.ContainerMetadata{Name: "container1"}, - State: v1.ContainerState_CONTAINER_RUNNING, - CreatedAt: time.Now().Add(-10 * time.Minute).UnixNano(), - Resources: nil, // No resources in status - }, - Info: map[string]string{ - "info": `{ - "pid": 12345, - "runtimeSpec": { - "hostname": "container-host", - "linux": { - "cgroupsPath": "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4", - "resources": { - "cpu": { - "quota": 50000, - "period": 100000 +func TestGenerateImageEventFromContainer(t *testing.T) { + time1, _ := time.Parse(time.RFC3339, "2023-01-01T00:00:00Z") + time2, _ := time.Parse(time.RFC3339, "2023-01-02T00:00:00Z") + tests := []struct { + name string + mockGetContainerImg func(context.Context, *v1.ImageSpec, bool) (*v1.ImageStatusResponse, error) + container *v1.Container + expectedEvent *workloadmeta.CollectorEvent + expectError bool + }{ + { + name: "Valid image metadata with history and layers", + mockGetContainerImg: func(_ context.Context, _ *v1.ImageSpec, _ bool) (*v1.ImageStatusResponse, error) { + return &v1.ImageStatusResponse{ + Image: &v1.Image{ + Id: "image123", + RepoTags: []string{"myrepo/myimage:latest"}, + RepoDigests: []string{"myrepo/myimage@sha256:123abc"}, + }, + Info: map[string]string{ + "info": `{ + "labels": {"label1": "value1", "label2": "value2"}, + "imageSpec": { + "os": "linux", + "architecture": "amd64", + "variant": "v8", + "rootfs": { + "diff_ids": ["sha256:layer1digest", "sha256:layer2digest"] + }, + "history": [ + { + "created": "2023-01-01T00:00:00Z", + "created_by": "command1", + "author": "author1", + "comment": "Layer 1 comment", + "empty_layer": false }, - "memory": { - "memoryLimitInBytes": 104857600 + { + "created": "2023-01-02T00:00:00Z", + "created_by": "command2", + "author": "author2", + "comment": "Layer 2 comment", + "empty_layer": false } - } + ] } - } - }`, + }`, + }, + }, nil + }, + container: &v1.Container{ + Id: "container1", + Image: &v1.ImageSpec{Image: "myrepo/myimage:latest"}, + PodSandboxId: "pod1", + }, + expectedEvent: &workloadmeta.CollectorEvent{ + Type: workloadmeta.EventTypeSet, + Source: workloadmeta.SourceRuntime, + Entity: &workloadmeta.ContainerImageMetadata{ + EntityID: workloadmeta.EntityID{Kind: workloadmeta.KindContainerImageMetadata, ID: "sha256:123abc"}, + EntityMeta: workloadmeta.EntityMeta{ + Name: "myrepo/myimage:latest", + Labels: map[string]string{"label1": "value1", "label2": "value2"}, + }, + RepoTags: []string{"myrepo/myimage:latest"}, + RepoDigests: []string{"myrepo/myimage@sha256:123abc"}, + OS: "linux", + Architecture: "amd64", + Variant: "v8", + Layers: []workloadmeta.ContainerImageLayer{ + { + Digest: "sha256:layer1digest", + History: &imgspecs.History{Created: &time1, CreatedBy: "command1", Author: "author1", Comment: "Layer 1 comment"}, + SizeBytes: 0, + }, + { + Digest: "sha256:layer2digest", + History: &imgspecs.History{Created: &time2, CreatedBy: "command2", Author: "author2", Comment: "Layer 2 comment"}, + SizeBytes: 0, + }, + }, + }, + }, + expectError: false, + }, + { + name: "Image has no repo tags or digest", + mockGetContainerImg: func(_ context.Context, _ *v1.ImageSpec, _ bool) (*v1.ImageStatusResponse, error) { + return &v1.ImageStatusResponse{ + Image: &v1.Image{ + Id: "image123", + }, + }, nil + }, + container: &v1.Container{ + Id: "container1", + Image: &v1.ImageSpec{Image: "repo/image:tag"}, + PodSandboxId: "pod1", + }, + expectedEvent: &workloadmeta.CollectorEvent{ + Type: workloadmeta.EventTypeSet, + Source: workloadmeta.SourceRuntime, + Entity: &workloadmeta.ContainerImageMetadata{ + EntityID: workloadmeta.EntityID{Kind: workloadmeta.KindContainerImageMetadata, ID: "image123"}, + EntityMeta: workloadmeta.EntityMeta{ + Name: "", + }, + RepoTags: nil, + RepoDigests: nil, }, - }, nil + }, + expectError: false, + }, + { + name: "Error retrieving image metadata", + mockGetContainerImg: func(_ context.Context, _ *v1.ImageSpec, _ bool) (*v1.ImageStatusResponse, error) { + return nil, fmt.Errorf("failed to retrieve image metadata") + }, + container: &v1.Container{ + Id: "container1", + Image: &v1.ImageSpec{Image: "repo/image:tag"}, + PodSandboxId: "pod1", + }, + expectedEvent: nil, + expectError: true, }, } - store := &fakeWorkloadmetaStore{} - crioCollector := collector{ - client: client, - store: store, + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := &mockCRIOClient{ + mockGetContainerImage: tt.mockGetContainerImg, + } + store := &mockWorkloadmetaStore{} + crioCollector := collector{ + client: client, + store: store, + } + + event, err := crioCollector.generateImageEventFromContainer(context.Background(), tt.container) + + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedEvent, event) + } + }) } - - err := crioCollector.Pull(context.Background()) - assert.NoError(t, err) - assert.Len(t, store.notifiedEvents, 1) - container := store.notifiedEvents[0].Entity.(*workloadmeta.Container) - - assert.Equal(t, floatPtr(0.5), container.Resources.CPULimit) - assert.Equal(t, uintPtr(104857600), container.Resources.MemoryLimit) - assert.Equal(t, 12345, container.PID) - assert.Equal(t, "container-host", container.Hostname) - assert.Equal(t, "/crio/crio-45e0df1c6e04fda693f5ef2654363c1ff5667bee7f8a9042ff5c629d48fbcbc4", container.CgroupPath) } diff --git a/comp/core/workloadmeta/collectors/internal/crio/image.go b/comp/core/workloadmeta/collectors/internal/crio/image.go new file mode 100644 index 0000000000000..a5f25dc5abfb7 --- /dev/null +++ b/comp/core/workloadmeta/collectors/internal/crio/image.go @@ -0,0 +1,272 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build crio + +package crio + +import ( + "context" + "encoding/json" + "fmt" + "os" + "strings" + "time" + + imgspecs "github.com/opencontainers/image-spec/specs-go/v1" + v1 "k8s.io/cri-api/pkg/apis/runtime/v1" + + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/util/crio" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +// generateImageEventFromContainer creates a workloadmeta image event based on container image metadata. +func (c *collector) generateImageEventFromContainer(ctx context.Context, container *v1.Container) (*workloadmeta.CollectorEvent, error) { + if container.GetImage() == nil || container.GetImage().GetImage() == "" { + return nil, fmt.Errorf("container has an invalid image reference: %+v", container) + } + imageSpec := v1.ImageSpec{Image: container.GetImage().GetImage()} + imageResp, err := c.client.GetContainerImage(ctx, &imageSpec, true) + if err != nil { + return nil, fmt.Errorf("failed to retrieve image data for container %+v: %w", container, err) + } + image := imageResp.GetImage() + + namespace := getPodNamespace(ctx, c.client, container.GetPodSandboxId()) + + imageEvent := c.convertImageToEvent(image, imageResp.GetInfo(), namespace) + return imageEvent, nil +} + +// convertImageToEvent converts a CRI-O image and additional metadata into a workloadmeta CollectorEvent. +func (c *collector) convertImageToEvent(img *v1.Image, info map[string]string, namespace string) *workloadmeta.CollectorEvent { + var annotations map[string]string + if img.GetSpec() == nil { + annotations = nil + } else { + annotations = img.GetSpec().GetAnnotations() + } + + var name string + if len(img.GetRepoTags()) > 0 { + name = img.GetRepoTags()[0] + } + imgID := img.GetId() + imgInfo := parseImageInfo(info, crio.GetOverlayImagePath(), imgID) + + imgIDAsDigest, err := parseDigests(img.GetRepoDigests()) + if err == nil { + imgID = imgIDAsDigest + } else if sbomCollectionIsEnabled() { + log.Warnf("Failed to parse digest for image with ID %s: %v. As a result, SBOM vulnerabilities may not be properly linked to this image.", imgID, err) + } + + imgMeta := workloadmeta.ContainerImageMetadata{ + EntityID: workloadmeta.EntityID{ + Kind: workloadmeta.KindContainerImageMetadata, + ID: imgID, + }, + EntityMeta: workloadmeta.EntityMeta{ + Name: name, + Namespace: namespace, + Annotations: annotations, + Labels: imgInfo.labels, + }, + SizeBytes: imgInfo.size, + RepoTags: img.GetRepoTags(), + RepoDigests: img.GetRepoDigests(), + OS: imgInfo.os, + Architecture: imgInfo.arch, + Variant: imgInfo.variant, + Layers: imgInfo.layers, + } + + return &workloadmeta.CollectorEvent{ + Type: workloadmeta.EventTypeSet, + Source: workloadmeta.SourceRuntime, + Entity: &imgMeta, + } +} + +// generateUnsetImageEvent generates an unset CollectorEvent for a removed or deleted image. +func generateUnsetImageEvent(seenID workloadmeta.EntityID) *workloadmeta.CollectorEvent { + return &workloadmeta.CollectorEvent{ + Type: workloadmeta.EventTypeUnset, + Source: workloadmeta.SourceRuntime, + Entity: &workloadmeta.ContainerImageMetadata{ + EntityID: seenID, + }, + } +} + +// parseDigests extracts the SHA from the image reference digest. +// The backend requires the image ID to be set as the SHA to correctly associate the SBOM with the image. +func parseDigests(imageRefs []string) (string, error) { + if len(imageRefs) == 0 { + return "", fmt.Errorf("empty digests list") + } + parts := strings.SplitN(imageRefs[0], "@", 2) + if len(parts) < 2 { + return "", fmt.Errorf("invalid format: no digest found in %s", imageRefs[0]) + } + + return parts[1], nil +} + +// parseImageInfo extracts operating system, architecture, variant, labels, and layer history from image info metadata. +func parseImageInfo(info map[string]string, layerFilePath string, imgID string) imageInfo { + var imgInfo imageInfo + + // Fetch additional layer information from the file + layerDetails, err := parseLayerInfo(layerFilePath, imgID) + if err != nil { + log.Debugf("Failed to get layer mediaType and size: %v", err) + } + + if imgSpec, ok := info["info"]; ok { + var parsed parsedInfo + + if err := json.Unmarshal([]byte(imgSpec), &parsed); err == nil { + imgInfo.os = parsed.ImageSpec.OS + imgInfo.arch = parsed.ImageSpec.Architecture + imgInfo.variant = parsed.ImageSpec.Variant + imgInfo.labels = parsed.Labels + + // Match layers with their history entries, including empty layers + historyIndex := 0 + for layerIndex, layerDigest := range parsed.ImageSpec.RootFS.DiffIDs { + // Append all empty layers encountered before this layer + for historyIndex < len(parsed.ImageSpec.History) { + history := parsed.ImageSpec.History[historyIndex] + if history.EmptyLayer { + created, _ := time.Parse(time.RFC3339, history.Created) + imgInfo.layers = append(imgInfo.layers, workloadmeta.ContainerImageLayer{ + History: &imgspecs.History{ + Created: &created, + CreatedBy: history.CreatedBy, + Author: history.Author, + Comment: history.Comment, + EmptyLayer: history.EmptyLayer, + }, + }) + historyIndex++ + } else { + // Stop at the first non-empty layer + break + } + } + + // Match the non-empty history to this layer + var historyEntry *imgspecs.History + if historyIndex < len(parsed.ImageSpec.History) { + h := parsed.ImageSpec.History[historyIndex] + created, _ := time.Parse(time.RFC3339, h.Created) + historyEntry = &imgspecs.History{ + Created: &created, + CreatedBy: h.CreatedBy, + Author: h.Author, + Comment: h.Comment, + EmptyLayer: h.EmptyLayer, + } + historyIndex++ + } + + // Create and append the layer with the matched history + layer := workloadmeta.ContainerImageLayer{ + Digest: layerDigest, + History: historyEntry, + } + + // Add additional details from parsed layer info + if layerIndex < len(layerDetails) { + imgInfo.size += int64(layerDetails[layerIndex].Size) + layer.SizeBytes = int64(layerDetails[layerIndex].Size) + layer.MediaType = layerDetails[layerIndex].MediaType + } + + imgInfo.layers = append(imgInfo.layers, layer) + } + + // Append any remaining empty layers + for historyIndex < len(parsed.ImageSpec.History) { + history := parsed.ImageSpec.History[historyIndex] + if history.EmptyLayer { + created, _ := time.Parse(time.RFC3339, history.Created) + imgInfo.layers = append(imgInfo.layers, workloadmeta.ContainerImageLayer{ + History: &imgspecs.History{ + Created: &created, + CreatedBy: history.CreatedBy, + Author: history.Author, + Comment: history.Comment, + EmptyLayer: history.EmptyLayer, + }, + }) + } + historyIndex++ + } + } else { + log.Warnf("Failed to parse image info: %v", err) + } + } + + return imgInfo +} + +// parseLayerInfo reads a JSON file from the given path and returns a list of layerInfo +func parseLayerInfo(rootPath string, imgID string) ([]layerInfo, error) { + filePath := fmt.Sprintf("%s/%s/manifest", rootPath, imgID) + file, err := os.Open(filePath) + if err != nil { + return nil, fmt.Errorf("failed to open file: %w", err) + } + defer file.Close() + + var manifest struct { + Layers []layerInfo `json:"layers"` + } + + if err := json.NewDecoder(file).Decode(&manifest); err != nil { + return nil, fmt.Errorf("failed to decode JSON: %w", err) + } + + return manifest.Layers, nil +} + +// layerInfo holds the size and mediaType of each layer +type layerInfo struct { + Size int `json:"size"` + MediaType string `json:"mediaType"` +} + +// imageInfo holds the size, OS, architecture, variant, labels, and layers of an image. +type imageInfo struct { + size int64 + os string + arch string + variant string + labels map[string]string + layers []workloadmeta.ContainerImageLayer +} + +// parsedInfo holds layer metadata extracted from image JSON. +type parsedInfo struct { + Labels map[string]string `json:"labels"` + ImageSpec struct { + OS string `json:"os"` + Architecture string `json:"architecture"` + Variant string `json:"variant"` + RootFS struct { + DiffIDs []string `json:"diff_ids"` + } `json:"rootfs"` + History []struct { + Created string `json:"created"` + CreatedBy string `json:"created_by"` + Author string `json:"author"` + Comment string `json:"comment"` + EmptyLayer bool `json:"empty_layer"` + } `json:"history"` + } `json:"imageSpec"` +} diff --git a/pkg/security/module/noecs_tags.go b/comp/core/workloadmeta/collectors/internal/crio/image_sbom_stub.go similarity index 61% rename from pkg/security/module/noecs_tags.go rename to comp/core/workloadmeta/collectors/internal/crio/image_sbom_stub.go index 93e2a2467d4b1..3b7c67e101db1 100644 --- a/pkg/security/module/noecs_tags.go +++ b/comp/core/workloadmeta/collectors/internal/crio/image_sbom_stub.go @@ -3,11 +3,14 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2016-present Datadog, Inc. -//go:build !docker +//go:build crio && !trivy -// Package module holds module related files -package module +package crio -func getCurrentECSTaskTags() (map[string]string, error) { - return nil, nil +import ( + "context" +) + +func (c *collector) startSBOMCollection(context.Context) error { + return nil } diff --git a/comp/core/workloadmeta/collectors/internal/crio/image_sbom_trivy.go b/comp/core/workloadmeta/collectors/internal/crio/image_sbom_trivy.go new file mode 100644 index 0000000000000..cdec9987ba615 --- /dev/null +++ b/comp/core/workloadmeta/collectors/internal/crio/image_sbom_trivy.go @@ -0,0 +1,186 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build crio && trivy + +package crio + +import ( + "context" + "fmt" + "os" + + "github.com/CycloneDX/cyclonedx-go" + + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/sbom" + "github.com/DataDog/datadog-agent/pkg/sbom/collectors" + "github.com/DataDog/datadog-agent/pkg/sbom/collectors/crio" + "github.com/DataDog/datadog-agent/pkg/sbom/scanner" + crioutil "github.com/DataDog/datadog-agent/pkg/util/crio" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +// startSBOMCollection starts the SBOM collection process and subscribes to image metadata events. +func (c *collector) startSBOMCollection(ctx context.Context) error { + if !sbomCollectionIsEnabled() { + return nil + } + if err := overlayDirectoryAccess(); err != nil { + return fmt.Errorf("SBOM collection enabled, but error accessing overlay directories: %w", err) + } + c.sbomScanner = scanner.GetGlobalScanner() + if c.sbomScanner == nil { + return fmt.Errorf("global SBOM scanner not found") + } + + filter := workloadmeta.NewFilterBuilder(). + SetEventType(workloadmeta.EventTypeSet). + AddKind(workloadmeta.KindContainerImageMetadata). + Build() + + imgEventsCh := c.store.Subscribe("SBOM collector", workloadmeta.NormalPriority, filter) + + scanner := collectors.GetCrioScanner() + if scanner == nil { + return fmt.Errorf("failed to retrieve CRI-O SBOM scanner") + } + + resultChan := scanner.Channel() + if resultChan == nil { + return fmt.Errorf("failed to retrieve scanner result channel") + } + + go c.handleImageEvents(ctx, imgEventsCh) + go c.startScanResultHandler(ctx, resultChan) + return nil +} + +// handleImageEvents listens for container image metadata events, triggering SBOM generation for new images. +func (c *collector) handleImageEvents(ctx context.Context, imgEventsCh <-chan workloadmeta.EventBundle) { + for { + select { + case <-ctx.Done(): + return + case eventBundle, ok := <-imgEventsCh: + if !ok { + log.Warnf("Event channel closed, exiting event handling loop.") + return + } + c.handleEventBundle(eventBundle) + } + } +} + +// handleEventBundle handles ContainerImageMetadata set events for which no SBOM generation attempt was done. +func (c *collector) handleEventBundle(eventBundle workloadmeta.EventBundle) { + eventBundle.Acknowledge() + for _, event := range eventBundle.Events { + image := event.Entity.(*workloadmeta.ContainerImageMetadata) + + if image.SBOM != nil && image.SBOM.Status != workloadmeta.Pending { + continue + } + if err := c.extractSBOMWithTrivy(image.ID); err != nil { + log.Warnf("Error extracting SBOM for image: namespace=%s name=%s, err: %s", image.Namespace, image.Name, err) + } + } +} + +// extractSBOMWithTrivy emits a scan request to the SBOM scanner. The scan result will be sent to the resultChan. +func (c *collector) extractSBOMWithTrivy(imageID string) error { + if err := c.sbomScanner.Scan(crio.NewScanRequest(imageID)); err != nil { + return fmt.Errorf("failed to trigger SBOM generation for CRI-O image ID %s: %w", imageID, err) + } + return nil +} + +// startScanResultHandler receives SBOM scan results and updates the workloadmeta entities accordingly. +func (c *collector) startScanResultHandler(ctx context.Context, resultChan <-chan sbom.ScanResult) { + for { + select { + case <-ctx.Done(): + return + case result, ok := <-resultChan: + if !ok { + return + } + c.processScanResult(result) + } + } +} + +// processScanResult updates the workloadmeta store with the SBOM for the image. +func (c *collector) processScanResult(result sbom.ScanResult) { + if result.ImgMeta == nil { + log.Errorf("Scan result missing image identifier. Error: %v", result.Error) + return + } + + c.notifyStoreWithSBOMForImage(result.ImgMeta.ID, convertScanResultToSBOM(result)) +} + +// convertScanResultToSBOM converts an SBOM scan result to a workloadmeta SBOM. +func convertScanResultToSBOM(result sbom.ScanResult) *workloadmeta.SBOM { + status := workloadmeta.Success + reportedError := "" + var report *cyclonedx.BOM + + if result.Error != nil { + log.Errorf("SBOM generation failed for image: %v", result.Error) + status = workloadmeta.Failed + reportedError = result.Error.Error() + } else if bom, err := result.Report.ToCycloneDX(); err != nil { + log.Errorf("Failed to convert report to CycloneDX BOM.") + status = workloadmeta.Failed + reportedError = err.Error() + } else { + report = bom + } + + return &workloadmeta.SBOM{ + CycloneDXBOM: report, + GenerationTime: result.CreatedAt, + GenerationDuration: result.Duration, + Status: status, + Error: reportedError, + } +} + +// notifyStoreWithSBOMForImage notifies the store about the SBOM for a given image. +func (c *collector) notifyStoreWithSBOMForImage(imageID string, sbom *workloadmeta.SBOM) { + c.store.Notify([]workloadmeta.CollectorEvent{ + { + Type: workloadmeta.EventTypeSet, + Source: workloadmeta.SourceTrivy, + Entity: &workloadmeta.ContainerImageMetadata{ + EntityID: workloadmeta.EntityID{ + Kind: workloadmeta.KindContainerImageMetadata, + ID: imageID, + }, + SBOM: sbom, + }, + }, + }) +} + +// overlayDirectoryAccess checks if the overlay directory and overlay-layers directory are accessible. +func overlayDirectoryAccess() error { + overlayPath := crioutil.GetOverlayPath() + if _, err := os.Stat(overlayPath); os.IsNotExist(err) { + return fmt.Errorf("overlay directory %s does not exist. Ensure this directory is mounted for SBOM collection to work", overlayPath) + } else if err != nil { + return fmt.Errorf("failed to check overlay directory %s: %w. Ensure this directory is mounted for SBOM collection to work", overlayPath, err) + } + + overlayLayersPath := crioutil.GetOverlayLayersPath() + if _, err := os.Stat(overlayLayersPath); os.IsNotExist(err) { + return fmt.Errorf("overlay-layers directory %s does not exist. Ensure this directory is mounted for SBOM collection to work", overlayLayersPath) + } else if err != nil { + return fmt.Errorf("failed to check overlay-layers directory %s: %w. Ensure this directory is mounted for SBOM collection to work", overlayLayersPath, err) + } + + return nil +} diff --git a/comp/core/workloadmeta/collectors/internal/crio/mock.go b/comp/core/workloadmeta/collectors/internal/crio/mock.go new file mode 100644 index 0000000000000..f48fb7d0e8d10 --- /dev/null +++ b/comp/core/workloadmeta/collectors/internal/crio/mock.go @@ -0,0 +1,92 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build crio + +package crio + +import ( + "context" + "errors" + + v1 "k8s.io/cri-api/pkg/apis/runtime/v1" + + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" +) + +// mockWorkloadmetaStore is a mock implementation of the workloadmeta store for testing purposes. +type mockWorkloadmetaStore struct { + workloadmeta.Component + notifiedEvents []workloadmeta.CollectorEvent +} + +// Notify appends events to the store's notifiedEvents, simulating notification behavior in tests. +func (store *mockWorkloadmetaStore) Notify(events []workloadmeta.CollectorEvent) { + store.notifiedEvents = append(store.notifiedEvents, events...) +} + +// mockCRIOClient simulates the CRI-O client, with configurable behavior through function hooks. +type mockCRIOClient struct { + mockGetAllContainers func(ctx context.Context) ([]*v1.Container, error) + mockGetContainerStatus func(ctx context.Context, containerID string) (*v1.ContainerStatusResponse, error) + mockGetPodStatus func(ctx context.Context, podID string) (*v1.PodSandboxStatus, error) + mockGetContainerImage func(ctx context.Context, imageSpec *v1.ImageSpec, verbose bool) (*v1.ImageStatusResponse, error) + mockRuntimeMetadata func(ctx context.Context) (*v1.VersionResponse, error) + mockGetCRIOImageLayers func(imgMeta *workloadmeta.ContainerImageMetadata) ([]string, error) +} + +// GetAllContainers returns a list of containers, or calls a mock function if defined. +func (f *mockCRIOClient) GetAllContainers(ctx context.Context) ([]*v1.Container, error) { + if f.mockGetAllContainers != nil { + return f.mockGetAllContainers(ctx) + } + return []*v1.Container{}, nil +} + +// GetContainerStatus retrieves the status of a container, or calls a mock function if defined. +func (f *mockCRIOClient) GetContainerStatus(ctx context.Context, containerID string) (*v1.ContainerStatusResponse, error) { + if f.mockGetContainerStatus != nil { + return f.mockGetContainerStatus(ctx, containerID) + } + return &v1.ContainerStatusResponse{}, nil +} + +// GetPodStatus retrieves the status of a pod, or calls a mock function if defined. +func (f *mockCRIOClient) GetPodStatus(ctx context.Context, podID string) (*v1.PodSandboxStatus, error) { + if f.mockGetPodStatus != nil { + return f.mockGetPodStatus(ctx, podID) + } + return &v1.PodSandboxStatus{}, nil +} + +// GetContainerImage retrieves image metadata, or calls a mock function if defined. +func (f *mockCRIOClient) GetContainerImage(ctx context.Context, imageSpec *v1.ImageSpec, verbose bool) (*v1.ImageStatusResponse, error) { + if f.mockGetContainerImage != nil { + return f.mockGetContainerImage(ctx, imageSpec, verbose) + } + return &v1.ImageStatusResponse{ + Image: &v1.Image{ + Id: "image123", + RepoTags: []string{"myrepo/myimage:latest"}, + RepoDigests: []string{"myrepo/myimage@sha256:123abc"}, + }, + }, nil +} + +// RuntimeMetadata retrieves the runtime metadata, or calls a mock function if defined. +func (f *mockCRIOClient) RuntimeMetadata(ctx context.Context) (*v1.VersionResponse, error) { + if f.mockRuntimeMetadata != nil { + return f.mockRuntimeMetadata(ctx) + } + return &v1.VersionResponse{RuntimeName: "cri-o", RuntimeVersion: "v1.30.0"}, nil +} + +// GetCRIOImageLayers retrieves the `diff` directories of each image layer, or calls a mock function if defined. +func (f *mockCRIOClient) GetCRIOImageLayers(_ *workloadmeta.ContainerImageMetadata) ([]string, error) { + if f.mockGetCRIOImageLayers != nil { + return f.mockGetCRIOImageLayers(nil) + } + return nil, errors.New("mock GetCRIOImageLayers function not defined") +} diff --git a/comp/core/workloadmeta/def/types.go b/comp/core/workloadmeta/def/types.go index da429ab5b0ff2..54bcfece491f7 100644 --- a/comp/core/workloadmeta/def/types.go +++ b/comp/core/workloadmeta/def/types.go @@ -64,6 +64,10 @@ const ( // use this source. SourceRuntime Source = "runtime" + // SourceTrivy represents entities detected by Trivy during the SBOM scan. + // `crio` uses this source. + SourceTrivy Source = "trivy" + // SourceNodeOrchestrator represents entities detected by the node // agent from an orchestrator. `kubelet` and `ecs` use this. SourceNodeOrchestrator Source = "node_orchestrator" @@ -1108,13 +1112,17 @@ func (i ContainerImageMetadata) String(verbose bool) string { _, _ = fmt.Fprintln(&sb, "Variant:", i.Variant) _, _ = fmt.Fprintln(&sb, "----------- SBOM -----------") - _, _ = fmt.Fprintln(&sb, "Status:", i.SBOM.Status) - switch i.SBOM.Status { - case Success: - _, _ = fmt.Fprintf(&sb, "Generated in: %.2f seconds\n", i.SBOM.GenerationDuration.Seconds()) - case Failed: - _, _ = fmt.Fprintf(&sb, "Error: %s\n", i.SBOM.Error) - default: + if i.SBOM != nil { + _, _ = fmt.Fprintln(&sb, "Status:", i.SBOM.Status) + switch i.SBOM.Status { + case Success: + _, _ = fmt.Fprintf(&sb, "Generated in: %.2f seconds\n", i.SBOM.GenerationDuration.Seconds()) + case Failed: + _, _ = fmt.Fprintf(&sb, "Error: %s\n", i.SBOM.Error) + default: + } + } else { + fmt.Fprintln(&sb, "SBOM is nil") } _, _ = fmt.Fprintln(&sb, "----------- Layers -----------") diff --git a/comp/haagent/def/component.go b/comp/haagent/def/component.go index 2472322d9a400..f1d3f53ce3fa5 100644 --- a/comp/haagent/def/component.go +++ b/comp/haagent/def/component.go @@ -22,4 +22,7 @@ type Component interface { // SetLeader takes the leader agent hostname as input, if it matches the current agent hostname, // the isLeader state is set to true, otherwise false. SetLeader(leaderAgentHostname string) + + // ShouldRunIntegration returns true if the integration should be run + ShouldRunIntegration(integrationName string) bool } diff --git a/comp/haagent/impl/config.go b/comp/haagent/impl/config.go index 2417106455a7d..dc55f791264fc 100644 --- a/comp/haagent/impl/config.go +++ b/comp/haagent/impl/config.go @@ -9,6 +9,20 @@ import ( "github.com/DataDog/datadog-agent/comp/core/config" ) +// validHaIntegrations represent the list of integrations that will be considered as +// an "HA Integration", meaning it will only run on the leader Agent. +// At the moment, the list of HA Integrations is hardcoded here, but we might provide +// more dynamic way to configure which integration should be considered HA Integration. +var validHaIntegrations = map[string]bool{ + // NDM integrations + "snmp": true, + "cisco_aci": true, + "cisco_sdwan": true, + + // Other integrations + "network_path": true, +} + type haAgentConfigs struct { enabled bool group string diff --git a/comp/haagent/impl/haagent.go b/comp/haagent/impl/haagent.go index 5328d039da2d3..050596256e16a 100644 --- a/comp/haagent/impl/haagent.go +++ b/comp/haagent/impl/haagent.go @@ -50,6 +50,15 @@ func (h *haAgentImpl) SetLeader(leaderAgentHostname string) { h.isLeader.Store(agentHostname == leaderAgentHostname) } +// ShouldRunIntegration return true if the agent integrations should to run. +// When ha-agent is disabled, the agent behave as standalone agent (non HA) and will always run all integrations. +func (h *haAgentImpl) ShouldRunIntegration(integrationName string) bool { + if h.Enabled() && validHaIntegrations[integrationName] { + return h.isLeader.Load() + } + return true +} + func (h *haAgentImpl) onHaAgentUpdate(updates map[string]state.RawConfig, applyStateCallback func(string, state.ApplyStatus)) { h.log.Debugf("Updates received: count=%d", len(updates)) diff --git a/comp/haagent/impl/haagent_comp.go b/comp/haagent/impl/haagent_comp.go index f922325a9a440..19697bba7d464 100644 --- a/comp/haagent/impl/haagent_comp.go +++ b/comp/haagent/impl/haagent_comp.go @@ -28,8 +28,8 @@ type Provides struct { // NewComponent creates a new haagent component func NewComponent(reqs Requires) (Provides, error) { - haAgentConfigs := newHaAgentConfigs(reqs.AgentConfig) - haAgent := newHaAgentImpl(reqs.Logger, haAgentConfigs) + haAgentConf := newHaAgentConfigs(reqs.AgentConfig) + haAgent := newHaAgentImpl(reqs.Logger, haAgentConf) var rcListener rctypes.ListenerProvider if haAgent.Enabled() { reqs.Logger.Debug("Add onHaAgentUpdate RCListener") diff --git a/comp/haagent/impl/haagent_test.go b/comp/haagent/impl/haagent_test.go index b91e33f27c4b6..8821843e21c73 100644 --- a/comp/haagent/impl/haagent_test.go +++ b/comp/haagent/impl/haagent_test.go @@ -169,3 +169,80 @@ func Test_haAgentImpl_onHaAgentUpdate(t *testing.T) { }) } } + +func Test_haAgentImpl_ShouldRunIntegration(t *testing.T) { + testAgentHostname := "my-agent-hostname" + tests := []struct { + name string + leader string + agentConfigs map[string]interface{} + expectShouldRunIntegration map[string]bool + }{ + { + name: "ha agent enabled and agent is leader", + // should run HA-integrations + // should run "non HA integrations" + agentConfigs: map[string]interface{}{ + "hostname": testAgentHostname, + "ha_agent.enabled": true, + "ha_agent.group": testGroup, + }, + leader: testAgentHostname, + expectShouldRunIntegration: map[string]bool{ + "snmp": true, + "cisco_aci": true, + "cisco_sdwan": true, + "network_path": true, + "unknown_integration": true, + "cpu": true, + }, + }, + { + name: "ha agent enabled and agent is not leader", + // should skip HA-integrations + // should run "non HA integrations" + agentConfigs: map[string]interface{}{ + "hostname": testAgentHostname, + "ha_agent.enabled": true, + "ha_agent.group": testGroup, + }, + leader: "another-agent-is-leader", + expectShouldRunIntegration: map[string]bool{ + "snmp": false, + "cisco_aci": false, + "cisco_sdwan": false, + "network_path": false, + "unknown_integration": true, + "cpu": true, + }, + }, + { + name: "ha agent not enabled", + // should run all integrations + agentConfigs: map[string]interface{}{ + "hostname": testAgentHostname, + "ha_agent.enabled": false, + "ha_agent.group": testGroup, + }, + leader: testAgentHostname, + expectShouldRunIntegration: map[string]bool{ + "snmp": true, + "cisco_aci": true, + "cisco_sdwan": true, + "network_path": true, + "unknown_integration": true, + "cpu": true, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + haAgent := newTestHaAgentComponent(t, tt.agentConfigs) + haAgent.Comp.SetLeader(tt.leader) + + for integrationName, shouldRun := range tt.expectShouldRunIntegration { + assert.Equalf(t, shouldRun, haAgent.Comp.ShouldRunIntegration(integrationName), "fail for integration: "+integrationName) + } + }) + } +} diff --git a/comp/haagent/mock/mock.go b/comp/haagent/mock/mock.go index 52142737704c9..37c5cf4aa9916 100644 --- a/comp/haagent/mock/mock.go +++ b/comp/haagent/mock/mock.go @@ -44,6 +44,10 @@ func (m *mockHaAgent) SetEnabled(enabled bool) { m.enabled = enabled } +func (m *mockHaAgent) ShouldRunIntegration(_ string) bool { + return true +} + // Component is the component type. type Component interface { haagent.Component diff --git a/comp/metadata/inventoryagent/inventoryagentimpl/inventoryagent_test.go b/comp/metadata/inventoryagent/inventoryagentimpl/inventoryagent_test.go index 876dea30e8db8..281e87baba7e5 100644 --- a/comp/metadata/inventoryagent/inventoryagentimpl/inventoryagent_test.go +++ b/comp/metadata/inventoryagent/inventoryagentimpl/inventoryagent_test.go @@ -490,8 +490,8 @@ func TestFetchSystemProbeAgent(t *testing.T) { assert.False(t, ia.data["feature_usm_postgres_enabled"].(bool)) assert.False(t, ia.data["feature_usm_redis_enabled"].(bool)) assert.False(t, ia.data["feature_usm_http2_enabled"].(bool)) - assert.False(t, ia.data["feature_usm_istio_enabled"].(bool)) - assert.False(t, ia.data["feature_usm_go_tls_enabled"].(bool)) + assert.True(t, ia.data["feature_usm_istio_enabled"].(bool)) + assert.True(t, ia.data["feature_usm_go_tls_enabled"].(bool)) assert.False(t, ia.data["feature_discovery_enabled"].(bool)) assert.False(t, ia.data["feature_tcp_queue_length_enabled"].(bool)) assert.False(t, ia.data["feature_oom_kill_enabled"].(bool)) diff --git a/comp/otelcol/collector/impl-pipeline/pipeline.go b/comp/otelcol/collector/impl-pipeline/pipeline.go index 863d498006725..ab46ab875bcff 100644 --- a/comp/otelcol/collector/impl-pipeline/pipeline.go +++ b/comp/otelcol/collector/impl-pipeline/pipeline.go @@ -152,10 +152,12 @@ func NewComponent(reqs Requires) (Provides, error) { OnStart: collector.start, OnStop: collector.stop, }) - + timeoutCallback := func(flaretypes.FlareBuilder) time.Duration { + return time.Second * time.Duration(reqs.Config.GetInt("otelcollector.flare.timeout")) + } return Provides{ Comp: collector, - FlareProvider: flaretypes.NewProvider(collector.fillFlare), + FlareProvider: flaretypes.NewProviderWithTimeout(collector.fillFlare, timeoutCallback), StatusProvider: status.NewInformationProvider(collector), }, nil } diff --git a/comp/otelcol/otlp/components/processor/infraattributesprocessor/common.go b/comp/otelcol/otlp/components/processor/infraattributesprocessor/common.go index 44e2ff6be69a0..3ea280b2b2c56 100644 --- a/comp/otelcol/otlp/components/processor/infraattributesprocessor/common.go +++ b/comp/otelcol/otlp/components/processor/infraattributesprocessor/common.go @@ -7,11 +7,13 @@ package infraattributesprocessor import ( "fmt" - "go.uber.org/zap" "strings" + "go.uber.org/zap" + "go.opentelemetry.io/collector/pdata/pcommon" conventions "go.opentelemetry.io/collector/semconv/v1.21.0" + conventions22 "go.opentelemetry.io/collector/semconv/v1.22.0" "github.com/DataDog/datadog-agent/comp/core/tagger/tags" "github.com/DataDog/datadog-agent/comp/core/tagger/types" @@ -95,8 +97,8 @@ func entityIDsFromAttributes(attrs pcommon.Map, generateID GenerateKubeMetadataE if containerID, ok := attrs.Get(conventions.AttributeContainerID); ok { entityIDs = append(entityIDs, types.NewEntityID(types.ContainerID, containerID.AsString())) } - if containerImageID, ok := attrs.Get(conventions.AttributeContainerImageID); ok { - splitImageID := strings.SplitN(containerImageID.AsString(), "@sha256:", 2) + if ociManifestDigest, ok := attrs.Get(conventions22.AttributeOciManifestDigest); ok { + splitImageID := strings.SplitN(ociManifestDigest.AsString(), "@sha256:", 2) if len(splitImageID) == 2 { entityIDs = append(entityIDs, types.NewEntityID(types.ContainerImageMetadata, fmt.Sprintf("sha256:%v", splitImageID[1]))) } diff --git a/comp/otelcol/otlp/components/processor/infraattributesprocessor/metrics_test.go b/comp/otelcol/otlp/components/processor/infraattributesprocessor/metrics_test.go index 3117d8e74e1cf..fad3b1f3b4fc6 100644 --- a/comp/otelcol/otlp/components/processor/infraattributesprocessor/metrics_test.go +++ b/comp/otelcol/otlp/components/processor/infraattributesprocessor/metrics_test.go @@ -15,6 +15,7 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric" "go.opentelemetry.io/collector/processor/processortest" conventions "go.opentelemetry.io/collector/semconv/v1.21.0" + conventions22 "go.opentelemetry.io/collector/semconv/v1.22.0" "github.com/DataDog/datadog-agent/comp/core/tagger/types" ) @@ -185,11 +186,11 @@ func TestEntityIDsFromAttributes(t *testing.T) { entityIDs: []string{"container_id://container_id_goes_here", "kubernetes_pod_uid://k8s_pod_uid_goes_here"}, }, { - name: "container image ID", + name: "image digest", attrs: func() pcommon.Map { attributes := pcommon.NewMap() attributes.FromRaw(map[string]interface{}{ - conventions.AttributeContainerImageID: "docker.io/foo@sha256:sha_goes_here", + conventions22.AttributeOciManifestDigest: "docker.io/foo@sha256:sha_goes_here", }) return attributes }(), diff --git a/comp/trace/config/setup.go b/comp/trace/config/setup.go index 52cca67431d5b..4cdb5f832eae2 100644 --- a/comp/trace/config/setup.go +++ b/comp/trace/config/setup.go @@ -194,7 +194,7 @@ func applyDatadogConfig(c *config.AgentConfig, core corecompcfg.Component) error c.SkipSSLValidation = core.GetBool("skip_ssl_validation") } if core.IsSet("apm_config.enabled") { - c.Enabled = core.GetBool("apm_config.enabled") + c.Enabled = utils.IsAPMEnabled(core) } if pkgconfigsetup.Datadog().IsSet("apm_config.log_file") { c.LogFilePath = pkgconfigsetup.Datadog().GetString("apm_config.log_file") diff --git a/docs/cloud-workload-security/agent_expressions.md b/docs/cloud-workload-security/agent_expressions.md index e051bb7d84ae3..386d12f3df6fc 100644 --- a/docs/cloud-workload-security/agent_expressions.md +++ b/docs/cloud-workload-security/agent_expressions.md @@ -40,7 +40,7 @@ SECL operators are used to combine event attributes together into a full express | `>=` | File | Greater or equal | 7.27 | | `<` | File | Lesser | 7.27 | | `<=` | File | Lesser or equal | 7.27 | -| `!` | File | Not | 7.27 | +| `!` or `not` | File | Not | 7.27 | | `^` | File | Binary not | 7.27 | | `in [elem1, ...]` | File | Element is contained in list | 7.27 | | `not in [elem1, ...]` | File | Element is not contained in list | 7.27 | @@ -48,8 +48,8 @@ SECL operators are used to combine event attributes together into a full express | `!~` | File | String not matching | 7.27 | | `&` | File | Binary and | 7.27 | | `\|` | File | Binary or | 7.27 | -| `&&` | File | Logical and | 7.27 | -| `\|\|` | File | Logical or | 7.27 | +| `&&` or `and` | File | Logical and | 7.27 | +| `\|\|` or `or` | File | Logical or | 7.27 | | `in CIDR` | Network | Element is in the IP range | 7.37 | | `not in CIDR` | Network | Element is not in the IP range | 7.37 | | `allin CIDR` | Network | All the elements are in the IP range | 7.37 | diff --git a/go.mod b/go.mod index e58ca2a03171c..a7a791af88629 100644 --- a/go.mod +++ b/go.mod @@ -742,7 +742,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/secretsmanager v1.34.6 github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20240409155312-26d1ea377073 github.com/cloudfoundry-community/go-cfclient/v2 v2.0.1-0.20230503155151-3d15366c5820 - github.com/containerd/cgroups/v3 v3.0.3 + github.com/containerd/cgroups/v3 v3.0.4 github.com/containerd/typeurl/v2 v2.2.3 github.com/dvsekhvalnov/jose2go v1.7.0 github.com/elastic/go-seccomp-bpf v1.5.0 diff --git a/go.sum b/go.sum index c7e1e4bd5af60..cb2bc7549b4d4 100644 --- a/go.sum +++ b/go.sum @@ -480,8 +480,8 @@ github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb h1:EDmT6Q9Zs+SbUo github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb/go.mod h1:ZjrT6AXHbDs86ZSdt/osfBi5qfexBrKUdONk989Wnk4= github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0 h1:sDMmm+q/3+BukdIpxwO365v/Rbspp2Nt5XntgQRXq8Q= github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0/go.mod h1:4Zcjuz89kmFXt9morQgcfYZAYZ5n8WHjt81YYWIwtTM= -github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGDJ9kip0= -github.com/containerd/cgroups/v3 v3.0.3/go.mod h1:8HBe7V3aWGLFPd/k03swSIsGjZhHI2WzJmticMgVuz0= +github.com/containerd/cgroups/v3 v3.0.4 h1:2fs7l3P0Qxb1nKWuJNFiwhp2CqiKzho71DQkDrHJIo4= +github.com/containerd/cgroups/v3 v3.0.4/go.mod h1:SA5DLYnXO8pTGYiAHXz94qvLQTKfVM5GEVisn4jpins= github.com/containerd/containerd v1.7.23 h1:H2CClyUkmpKAGlhQp95g2WXHfLYc7whAuvZGBNYOOwQ= github.com/containerd/containerd v1.7.23/go.mod h1:7QUzfURqZWCZV7RLNEn1XjUCQLEf0bkaK4GjUaZehxw= github.com/containerd/containerd/api v1.8.0 h1:hVTNJKR8fMc/2Tiw60ZRijntNMd1U+JVMyTRdsD2bS0= diff --git a/omnibus/config/projects/agent.rb b/omnibus/config/projects/agent.rb index e678c447b7e7e..bd24283687dbf 100644 --- a/omnibus/config/projects/agent.rb +++ b/omnibus/config/projects/agent.rb @@ -3,6 +3,7 @@ # This product includes software developed at Datadog (https:#www.datadoghq.com/). # Copyright 2016-present Datadog, Inc. require "./lib/ostools.rb" +require "./lib/project_helpers.rb" flavor = ENV['AGENT_FLAVOR'] output_config_dir = ENV["OUTPUT_CONFIG_DIR"] @@ -224,7 +225,7 @@ dependency 'datadog-agent' # System-probe - if linux_target? && !heroku_target? + if sysprobe_enabled? dependency 'system-probe' end @@ -236,6 +237,9 @@ if linux_target? dependency 'datadog-security-agent-policies' + if fips_mode? + dependency 'openssl-fips-provider' + end end # Include traps db file in snmp.d/traps_db/ @@ -325,10 +329,22 @@ GO_BINARIES << "#{install_dir}\\bin\\agent\\security-agent.exe" end + raise_if_fips_symbol_not_found = Proc.new { |symbols| + count = symbols.scan("github.com/microsoft/go-crypto-winnative").count() + if count == 0 + raise FIPSSymbolsNotFound.new("Expected to find symbol 'github.com/microsoft/go-crypto-winnative' but no symbol was found.") + end + } + GO_BINARIES.each do |bin| # Check the exported symbols from the binary inspect_binary(bin, &raise_if_forbidden_symbol_found) + if fips_mode? + # Check that CNG symbols are present + inspect_binary(bin, &raise_if_fips_symbol_not_found) + end + # strip the binary of debug symbols windows_symbol_stripping_file bin end diff --git a/omnibus/config/software/datadog-agent-finalize.rb b/omnibus/config/software/datadog-agent-finalize.rb index f90a8843a2dfb..b797b6e8dcc55 100644 --- a/omnibus/config/software/datadog-agent-finalize.rb +++ b/omnibus/config/software/datadog-agent-finalize.rb @@ -91,15 +91,17 @@ move "#{install_dir}/etc/datadog-agent/datadog.yaml.example", "#{output_config_dir}/etc/datadog-agent" move "#{install_dir}/etc/datadog-agent/conf.d", "#{output_config_dir}/etc/datadog-agent", :force=>true unless heroku_target? - move "#{install_dir}/etc/datadog-agent/system-probe.yaml.example", "#{output_config_dir}/etc/datadog-agent" + if sysprobe_enabled? + move "#{install_dir}/etc/datadog-agent/system-probe.yaml.example", "#{output_config_dir}/etc/datadog-agent" + # SElinux policies aren't generated when system-probe isn't built + # Move SELinux policy + if debian_target? || redhat_target? + move "#{install_dir}/etc/datadog-agent/selinux", "#{output_config_dir}/etc/datadog-agent/selinux" + end + end move "#{install_dir}/etc/datadog-agent/security-agent.yaml.example", "#{output_config_dir}/etc/datadog-agent", :force=>true move "#{install_dir}/etc/datadog-agent/runtime-security.d", "#{output_config_dir}/etc/datadog-agent", :force=>true move "#{install_dir}/etc/datadog-agent/compliance.d", "#{output_config_dir}/etc/datadog-agent" - - # Move SELinux policy - if debian_target? || redhat_target? - move "#{install_dir}/etc/datadog-agent/selinux", "#{output_config_dir}/etc/datadog-agent/selinux" - end end if ot_target? diff --git a/omnibus/config/software/datadog-agent.rb b/omnibus/config/software/datadog-agent.rb index b84585631fd75..cabf4557c735c 100644 --- a/omnibus/config/software/datadog-agent.rb +++ b/omnibus/config/software/datadog-agent.rb @@ -4,6 +4,7 @@ # Copyright 2016-present Datadog, Inc. require './lib/ostools.rb' +require './lib/project_helpers.rb' require 'pathname' name 'datadog-agent' @@ -32,7 +33,9 @@ # set GOPATH on the omnibus source dir for this software gopath = Pathname.new(project_dir) + '../../../..' + msgoroot = "/usr/local/msgo" flavor_arg = ENV['AGENT_FLAVOR'] + fips_args = fips_mode? ? "--fips-mode" : "" if windows_target? env = { 'GOPATH' => gopath.to_path, @@ -57,9 +60,11 @@ # include embedded path (mostly for `pkg-config` binary) env = with_standard_compiler_flags(with_embedded_path(env)) - default_install_dir = "/opt/datadog-agent" - if Omnibus::Config.host_distribution == "ociru" - default_install_dir = "#{install_dir}" + + # Use msgo toolchain when fips mode is enabled + if fips_mode? && !windows_target? + env["GOROOT"] = msgoroot + env["PATH"] = "#{msgoroot}/bin:#{env['PATH']}" end # we assume the go deps are already installed before running omnibus @@ -83,10 +88,10 @@ if linux_target? include_sds = "--include-sds" # we only support SDS on Linux targets for now end - command "inv -e agent.build --exclude-rtloader #{include_sds} --major-version #{major_version_arg} --rebuild --no-development --install-path=#{install_dir} --embedded-path=#{default_install_dir}/embedded --python-home-2=#{default_install_dir}/embedded --python-home-3=#{default_install_dir}/embedded --flavor #{flavor_arg}", env: env + command "inv -e agent.build --exclude-rtloader #{include_sds} --major-version #{major_version_arg} --rebuild --no-development --install-path=#{install_dir} --embedded-path=#{install_dir}/embedded --flavor #{flavor_arg}", env: env if heroku_target? - command "inv -e agent.build --exclude-rtloader --major-version #{major_version_arg} --rebuild --no-development --install-path=#{install_dir} --embedded-path=#{install_dir}/embedded --python-home-2=#{install_dir}/embedded --python-home-3=#{install_dir}/embedded --flavor #{flavor_arg} --agent-bin=bin/agent/core-agent", env: env + command "inv -e agent.build --exclude-rtloader --major-version #{major_version_arg} --rebuild --no-development --install-path=#{install_dir} --embedded-path=#{install_dir}/embedded --flavor #{flavor_arg} --agent-bin=bin/agent/core-agent", env: env end end @@ -134,12 +139,11 @@ end # System-probe - sysprobe_support = (not heroku_target?) && (linux_target? || (windows_target? && do_windows_sysprobe != "")) - if sysprobe_support + if sysprobe_enabled? || (windows_target? && do_windows_sysprobe != "") if windows_target? command "invoke -e system-probe.build", env: env elsif linux_target? - command "invoke -e system-probe.build-sysprobe-binary --install-path=#{install_dir}", env: env + command "invoke -e system-probe.build-sysprobe-binary #{fips_args} --install-path=#{install_dir}", env: env end if windows_target? @@ -160,7 +164,7 @@ # Security agent secagent_support = (not heroku_target?) and (not windows_target? or (ENV['WINDOWS_DDPROCMON_DRIVER'] and not ENV['WINDOWS_DDPROCMON_DRIVER'].empty?)) if secagent_support - command "invoke -e security-agent.build --install-path=#{install_dir} --major-version #{major_version_arg}", :env => env + command "invoke -e security-agent.build #{fips_args} --install-path=#{install_dir} --major-version #{major_version_arg}", :env => env if windows_target? copy 'bin/security-agent/security-agent.exe', "#{install_dir}/bin/agent" else @@ -224,6 +228,37 @@ delete "#{install_dir}/uselessfile" end + # TODO: move this to omnibus-ruby::health-check.rb + # check that linux binaries contains OpenSSL symbols when building to support FIPS + if fips_mode? && linux_target? + # Put the ruby code in a block to prevent omnibus from running it directly but rather at build step with the rest of the code above. + # If not in a block, it will search for binaries that have not been built yet. + block do + LINUX_BINARIES = [ + "#{install_dir}/bin/agent/agent", + "#{install_dir}/embedded/bin/trace-agent", + "#{install_dir}/embedded/bin/process-agent", + "#{install_dir}/embedded/bin/security-agent", + "#{install_dir}/embedded/bin/system-probe", + ] + + symbol = "_Cfunc_go_openssl" + check_block = Proc.new { |binary, symbols| + count = symbols.scan(symbol).count + if count > 0 + log.info(log_key) { "Symbol '#{symbol}' found #{count} times in binary '#{binary}'." } + else + raise FIPSSymbolsNotFound.new("Expected to find '#{symbol}' symbol in #{binary} but did not") + end + }.curry + + LINUX_BINARIES.each do |bin| + partially_applied_check = check_block.call(bin) + GoSymbolsInspector.new(bin, &partially_applied_check).inspect() + end + end + end + python_scripts_dir = "#{project_dir}/omnibus/python-scripts" mkdir "#{install_dir}/python-scripts" copy "#{python_scripts_dir}/*", "#{install_dir}/python-scripts" diff --git a/omnibus/config/software/openssl-fips-provider.rb b/omnibus/config/software/openssl-fips-provider.rb new file mode 100644 index 0000000000000..776d7e81531b3 --- /dev/null +++ b/omnibus/config/software/openssl-fips-provider.rb @@ -0,0 +1,47 @@ +# Unless explicitly stated otherwise all files in this repository are licensed +# under the Apache License Version 2.0. +# This product includes software developed at Datadog (https://www.datadoghq.com). +# Copyright 2016-present Datadog, Inc. + +# Embedded OpenSSL to meet FIPS requirements. It comes in two parts: +# 1. The FIPS module itself (this software definition). It must use a FIPS-validated version +# and follow the build steps outlined in the OpenSSL FIPS Security Policy. +# 2. The OpenSSL library, which can be any 3.0.x version. This library will use the FIPS provider. + +name "openssl-fips-provider" +default_version "0.0.1" + +OPENSSL_FIPS_MODULE_VERSION="3.0.9" +OPENSSL_FIPS_MODULE_FILENAME="openssl-#{OPENSSL_FIPS_MODULE_VERSION}.tar.gz" +OPENSSL_FIPS_MODULE_SHA256_SUM="eb1ab04781474360f77c318ab89d8c5a03abc38e63d65a603cabbf1b00a1dc90" + +source url: "https://www.openssl.org/source/#{OPENSSL_FIPS_MODULE_FILENAME}", + sha256: "#{OPENSSL_FIPS_MODULE_SHA256_SUM}", + extract: :seven_zip + +relative_path "openssl-#{OPENSSL_FIPS_MODULE_VERSION}" + +build do + # Exact build steps from security policy: + # https://csrc.nist.gov/CSRC/media/projects/cryptographic-module-validation-program/documents/security-policies/140sp4282.pdf + # + # ---------------- DO NOT MODIFY LINES BELOW HERE ---------------- + command "./Configure enable-fips" + + command "make" + command "make install" + # ---------------- DO NOT MODIFY LINES ABOVE HERE ---------------- + + mkdir "#{install_dir}/embedded/ssl" + mkdir "#{install_dir}/embedded/lib/ossl-modules" + copy "/usr/local/lib*/ossl-modules/fips.so", "#{install_dir}/embedded/lib/ossl-modules/fips.so" + + erb source: "openssl.cnf.erb", + dest: "#{install_dir}/embedded/ssl/openssl.cnf.tmp", + mode: 0644, + vars: { install_dir: install_dir } + erb source: "fipsinstall.sh.erb", + dest: "#{install_dir}/embedded/bin/fipsinstall.sh", + mode: 0755, + vars: { install_dir: install_dir } +end diff --git a/omnibus/config/software/system-probe.rb b/omnibus/config/software/system-probe.rb index 19fdf7f0b9dbb..8c889601879d8 100644 --- a/omnibus/config/software/system-probe.rb +++ b/omnibus/config/software/system-probe.rb @@ -21,19 +21,17 @@ mkdir "#{install_dir}/embedded/share/system-probe/ebpf/co-re/btf" mkdir "#{install_dir}/embedded/share/system-probe/java" - if ENV.has_key?('SYSTEM_PROBE_BIN') and not ENV['SYSTEM_PROBE_BIN'].empty? - arch = `uname -m`.strip - if arch == "aarch64" - arch = "arm64" - end - copy "pkg/ebpf/bytecode/build/#{arch}/*.o", "#{install_dir}/embedded/share/system-probe/ebpf/" - delete "#{install_dir}/embedded/share/system-probe/ebpf/usm_events_test*.o" - copy "pkg/ebpf/bytecode/build/#{arch}/co-re/*.o", "#{install_dir}/embedded/share/system-probe/ebpf/co-re/" - copy "pkg/ebpf/bytecode/build/runtime/*.c", "#{install_dir}/embedded/share/system-probe/ebpf/runtime/" - copy "#{ENV['SYSTEM_PROBE_BIN']}/clang-bpf", "#{install_dir}/embedded/bin/clang-bpf" - copy "#{ENV['SYSTEM_PROBE_BIN']}/llc-bpf", "#{install_dir}/embedded/bin/llc-bpf" - copy "#{ENV['SYSTEM_PROBE_BIN']}/minimized-btfs.tar.xz", "#{install_dir}/embedded/share/system-probe/ebpf/co-re/btf/minimized-btfs.tar.xz" + arch = `uname -m`.strip + if arch == "aarch64" + arch = "arm64" end + copy "pkg/ebpf/bytecode/build/#{arch}/*.o", "#{install_dir}/embedded/share/system-probe/ebpf/" + delete "#{install_dir}/embedded/share/system-probe/ebpf/usm_events_test*.o" + copy "pkg/ebpf/bytecode/build/#{arch}/co-re/*.o", "#{install_dir}/embedded/share/system-probe/ebpf/co-re/" + copy "pkg/ebpf/bytecode/build/runtime/*.c", "#{install_dir}/embedded/share/system-probe/ebpf/runtime/" + copy "#{ENV['SYSTEM_PROBE_BIN']}/clang-bpf", "#{install_dir}/embedded/bin/clang-bpf" + copy "#{ENV['SYSTEM_PROBE_BIN']}/llc-bpf", "#{install_dir}/embedded/bin/llc-bpf" + copy "#{ENV['SYSTEM_PROBE_BIN']}/minimized-btfs.tar.xz", "#{install_dir}/embedded/share/system-probe/ebpf/co-re/btf/minimized-btfs.tar.xz" copy 'pkg/ebpf/c/COPYING', "#{install_dir}/embedded/share/system-probe/ebpf/" end diff --git a/omnibus/config/templates/openssl-fips-provider/fipsinstall.sh.erb b/omnibus/config/templates/openssl-fips-provider/fipsinstall.sh.erb new file mode 100644 index 0000000000000..a789db7c20df4 --- /dev/null +++ b/omnibus/config/templates/openssl-fips-provider/fipsinstall.sh.erb @@ -0,0 +1,29 @@ +#!/bin/bash + +# The OpenSSL security policy states: +# "The Module shall have the self-tests run, and the Module config file output generated on each +# platform where it is intended to be used. The Module config file output data shall not be copied from +# one machine to another." +# This script aims to run self-tests and generate `fipsmodule.cnf.` +# Because the provided `openssl.cnf` references to `fipsmodule.cnf` which is not yet created, we first create it +# as `openssl.cnf.tmp` and then move it to its final name `openssl.cnf` when `fipsmodule.cnf` has been created + +set -euo pipefail + +INSTALL_DIR="<%= install_dir %>/embedded" + +FIPS_MODULE_PATH="${INSTALL_DIR}/ssl/fipsmodule.cnf" +OPENSSL_CONF_PATH="${INSTALL_DIR}/ssl/openssl.cnf" + +FIPS_SO_PATH="${INSTALL_DIR}/lib/ossl-modules/fips.so" +OPENSSL_BIN="${INSTALL_DIR}/bin/openssl" + + +if [ ! -f "${FIPS_MODULE_PATH}" ]; then + "${OPENSSL_BIN}" fipsinstall -module "${FIPS_SO_PATH}" -out "${FIPS_MODULE_PATH}" + mv "${OPENSSL_CONF_PATH}.tmp" "${OPENSSL_CONF_PATH}" +fi + +if ! "${OPENSSL_BIN}" fipsinstall -module "${FIPS_SO_PATH}" -in "${FIPS_MODULE_PATH}" -verify; then + echo "openssl fipsinstall: verification of FIPS compliance failed. $INSTALL_DIR/fipsmodule.cnf was corrupted or the installation failed." +fi diff --git a/omnibus/config/templates/openssl-fips-provider/openssl.cnf.erb b/omnibus/config/templates/openssl-fips-provider/openssl.cnf.erb new file mode 100644 index 0000000000000..1304d1021b08d --- /dev/null +++ b/omnibus/config/templates/openssl-fips-provider/openssl.cnf.erb @@ -0,0 +1,14 @@ +config_diagnostics = 1 +openssl_conf = openssl_init + +.include <%= install_dir %>/embedded/ssl/fipsmodule.cnf + +[openssl_init] +providers = provider_sect + +[provider_sect] +fips = fips_sect +base = base_sect + +[base_sect] +activate = 1 diff --git a/omnibus/lib/ostools.rb b/omnibus/lib/ostools.rb index 13d8d26e06e67..6ec8612585c37 100644 --- a/omnibus/lib/ostools.rb +++ b/omnibus/lib/ostools.rb @@ -63,3 +63,6 @@ def os end end +def fips_mode?() + return ENV['AGENT_FLAVOR'] == "fips" && (linux_target? || windows_target?) +end \ No newline at end of file diff --git a/omnibus/lib/project_helpers.rb b/omnibus/lib/project_helpers.rb new file mode 100644 index 0000000000000..d1498342ec617 --- /dev/null +++ b/omnibus/lib/project_helpers.rb @@ -0,0 +1,12 @@ +# +# Project related helpers +# + +require './lib/ostools.rb' + +def sysprobe_enabled?() + # This doesn't account for Windows special case which build system probe as part of the + # agent build process + !heroku_target? && linux_target? && !ENV.fetch('SYSTEM_PROBE_BIN', '').empty? +end + diff --git a/omnibus/lib/symbols_inspectors.rb b/omnibus/lib/symbols_inspectors.rb index 31a56c0171a9d..797f239c0dc12 100644 --- a/omnibus/lib/symbols_inspectors.rb +++ b/omnibus/lib/symbols_inspectors.rb @@ -5,6 +5,10 @@ class ForbiddenSymbolsFoundError < StandardError end +class FIPSSymbolsNotFound < StandardError + +end + # Helper class to locate `dumpbin.exe` on Windows class Dumpbin include Singleton @@ -76,4 +80,4 @@ def inspect() log.info(self.class.name) { "Inspecting binary #{@binary}" } @block.call(`go tool nm #{@binary}`) end -end \ No newline at end of file +end diff --git a/omnibus/package-scripts/agent-deb/postinst b/omnibus/package-scripts/agent-deb/postinst index f0d2f12e78ad1..09a1c4a0d6d41 100755 --- a/omnibus/package-scripts/agent-deb/postinst +++ b/omnibus/package-scripts/agent-deb/postinst @@ -65,7 +65,11 @@ install_method: echo "$install_info_content" > $CONFIG_DIR/install_info fi -set +e +if [ -x ${INSTALL_DIR}/embedded/bin/fipsinstall.sh ]; then + ${INSTALL_DIR}/embedded/bin/fipsinstall.sh +fi + +set +e generate_install_id() { # Try generating a UUID based on /proc/sys/kernel/random/uuid diff --git a/omnibus/package-scripts/agent-deb/prerm b/omnibus/package-scripts/agent-deb/prerm index 19c8714ec2cb9..67c916c335da5 100755 --- a/omnibus/package-scripts/agent-deb/prerm +++ b/omnibus/package-scripts/agent-deb/prerm @@ -167,6 +167,13 @@ remove_persist_integration_files() fi } +remove_fips_module() +{ + # We explicitly remove the ssl directory because files within this folder are generated via a script + # outside of package installation (deb package only removes files initially present in the package). + rm -rf "${INSTALL_DIR}/embedded/ssl/fipsmodule.cnf" || true +} + case "$1" in #this can't be merged with the later case block because running 'remove_custom_integrations' would defeat the persisting integrations feature upgrade) # We're upgrading. @@ -189,6 +196,7 @@ case "$1" in remove_version_history remove_sysprobe_secagent_files remove_remote_config_db + remove_fips_module remove_persist_integration_files ;; upgrade) diff --git a/omnibus/package-scripts/agent-rpm/posttrans b/omnibus/package-scripts/agent-rpm/posttrans index a560a4626b45d..7206123b61636 100755 --- a/omnibus/package-scripts/agent-rpm/posttrans +++ b/omnibus/package-scripts/agent-rpm/posttrans @@ -110,6 +110,11 @@ install_method: echo "$install_info_content" > $CONFIG_DIR/install_info fi +if [ -x ${INSTALL_DIR}/embedded/bin/fipsinstall.sh ]; then + ${INSTALL_DIR}/embedded/bin/fipsinstall.sh +fi + + set +e generate_install_id() { diff --git a/omnibus/package-scripts/agent-rpm/prerm b/omnibus/package-scripts/agent-rpm/prerm index 9568f9b55f5ee..df2a03b266047 100755 --- a/omnibus/package-scripts/agent-rpm/prerm +++ b/omnibus/package-scripts/agent-rpm/prerm @@ -151,6 +151,13 @@ remove_persist_integration_files() fi } +remove_fips_module() +{ + # We explicitly remove the ssl directory because files within this folder are generated via a script + # outside of package installation (rpm package only removes files initially present in the package). + rm -rf "${INSTALL_DIR}/embedded/ssl/fipsmodule.cnf" || true +} + case "$*" in 1) # We're upgrading. @@ -173,6 +180,7 @@ case "$*" in remove_version_history remove_sysprobe_secagent_files remove_remote_config_db + remove_fips_module remove_persist_integration_files ;; 1) diff --git a/pkg/clusteragent/clusterchecks/dispatcher_isolate_test.go b/pkg/clusteragent/clusterchecks/dispatcher_isolate_test.go index 609d4264fa968..bd2cfbb276f83 100644 --- a/pkg/clusteragent/clusterchecks/dispatcher_isolate_test.go +++ b/pkg/clusteragent/clusterchecks/dispatcher_isolate_test.go @@ -13,13 +13,15 @@ import ( "github.com/stretchr/testify/assert" "github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration" + "github.com/DataDog/datadog-agent/comp/core/tagger/mock" "github.com/DataDog/datadog-agent/pkg/clusteragent/clusterchecks/types" checkid "github.com/DataDog/datadog-agent/pkg/collector/check/id" pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" ) func TestIsolateCheckSuccessful(t *testing.T) { - testDispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + testDispatcher := newDispatcher(fakeTagger) testDispatcher.store.nodes["A"] = newNodeStore("A", "") testDispatcher.store.nodes["A"].workers = pkgconfigsetup.DefaultNumWorkers testDispatcher.store.nodes["B"] = newNodeStore("B", "") @@ -99,7 +101,8 @@ func TestIsolateCheckSuccessful(t *testing.T) { } func TestIsolateNonExistentCheckFails(t *testing.T) { - testDispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + testDispatcher := newDispatcher(fakeTagger) testDispatcher.store.nodes["A"] = newNodeStore("A", "") testDispatcher.store.nodes["A"].workers = pkgconfigsetup.DefaultNumWorkers testDispatcher.store.nodes["B"] = newNodeStore("B", "") @@ -177,7 +180,8 @@ func TestIsolateNonExistentCheckFails(t *testing.T) { } func TestIsolateCheckOnlyOneRunnerFails(t *testing.T) { - testDispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + testDispatcher := newDispatcher(fakeTagger) testDispatcher.store.nodes["A"] = newNodeStore("A", "") testDispatcher.store.nodes["A"].workers = pkgconfigsetup.DefaultNumWorkers diff --git a/pkg/clusteragent/clusterchecks/dispatcher_main.go b/pkg/clusteragent/clusterchecks/dispatcher_main.go index 398bf21d2c5c5..6f1a894f4e354 100644 --- a/pkg/clusteragent/clusterchecks/dispatcher_main.go +++ b/pkg/clusteragent/clusterchecks/dispatcher_main.go @@ -13,6 +13,8 @@ import ( "time" "github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration" + tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def" + "github.com/DataDog/datadog-agent/comp/core/tagger/types" pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" "github.com/DataDog/datadog-agent/pkg/status/health" "github.com/DataDog/datadog-agent/pkg/util/clusteragent" @@ -34,12 +36,21 @@ type dispatcher struct { rebalancingPeriod time.Duration } -func newDispatcher() *dispatcher { +func newDispatcher(tagger tagger.Component) *dispatcher { d := &dispatcher{ store: newClusterStore(), } d.nodeExpirationSeconds = pkgconfigsetup.Datadog().GetInt64("cluster_checks.node_expiration_timeout") - d.extraTags = pkgconfigsetup.Datadog().GetStringSlice("cluster_checks.extra_tags") + + // Attach the cluster agent's global tags to all dispatched checks + // as defined in the tagger's workloadmeta collector + var err error + d.extraTags, err = tagger.GlobalTags(types.LowCardinality) + if err != nil { + log.Warnf("Cannot get global tags from the tagger: %v", err) + } else { + log.Debugf("Adding global tags to cluster check dispatcher: %v", d.extraTags) + } excludedChecks := pkgconfigsetup.Datadog().GetStringSlice("cluster_checks.exclude_checks") // This option will almost always be empty @@ -77,7 +88,6 @@ func newDispatcher() *dispatcher { return d } - var err error d.clcRunnersClient, err = clusteragent.GetCLCRunnerClient() if err != nil { log.Warnf("Cannot create CLC runners client, advanced dispatching will be disabled: %v", err) diff --git a/pkg/clusteragent/clusterchecks/dispatcher_rebalance_test.go b/pkg/clusteragent/clusterchecks/dispatcher_rebalance_test.go index 09747e5c82abf..2c073bab0670b 100644 --- a/pkg/clusteragent/clusterchecks/dispatcher_rebalance_test.go +++ b/pkg/clusteragent/clusterchecks/dispatcher_rebalance_test.go @@ -15,6 +15,7 @@ import ( "github.com/stretchr/testify/require" "github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration" + "github.com/DataDog/datadog-agent/comp/core/tagger/mock" "github.com/DataDog/datadog-agent/pkg/clusteragent/clusterchecks/types" checkid "github.com/DataDog/datadog-agent/pkg/collector/check/id" pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" @@ -1377,7 +1378,8 @@ func TestRebalance(t *testing.T) { checkMetricSamplesWeight = originalMetricSamplesWeight }() - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) // prepare store dispatcher.store.active = true @@ -1433,7 +1435,8 @@ func TestMoveCheck(t *testing.T) { }, } { t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) // setup check id id := checkid.BuildID(tc.check.config.Name, tc.check.config.FastDigest(), tc.check.config.Instances[0], tc.check.config.InitConfig) @@ -1477,7 +1480,8 @@ func TestCalculateAvg(t *testing.T) { checkMetricSamplesWeight = originalMetricSamplesWeight }() - testDispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + testDispatcher := newDispatcher(fakeTagger) // The busyness of this node is 3 (1 + 2) testDispatcher.store.nodes["node1"] = newNodeStore("node1", "") @@ -1518,7 +1522,8 @@ func TestRebalanceUsingUtilization(t *testing.T) { // other tests specific for the checksDistribution struct that test more // complex scenarios. - testDispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + testDispatcher := newDispatcher(fakeTagger) testDispatcher.store.active = true testDispatcher.store.nodes["node1"] = newNodeStore("node1", "") diff --git a/pkg/clusteragent/clusterchecks/dispatcher_test.go b/pkg/clusteragent/clusterchecks/dispatcher_test.go index f2c31184ff8b7..ff348a025ed58 100644 --- a/pkg/clusteragent/clusterchecks/dispatcher_test.go +++ b/pkg/clusteragent/clusterchecks/dispatcher_test.go @@ -16,6 +16,7 @@ import ( "gopkg.in/yaml.v2" "github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration" + "github.com/DataDog/datadog-agent/comp/core/tagger/mock" "github.com/DataDog/datadog-agent/pkg/clusteragent/clusterchecks/types" "github.com/DataDog/datadog-agent/pkg/config/env" configmock "github.com/DataDog/datadog-agent/pkg/config/mock" @@ -49,7 +50,8 @@ func extractCheckNames(configs []integration.Config) []string { } func TestScheduleUnschedule(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) stored, err := dispatcher.getAllConfigs() assert.NoError(t, err) assert.Len(t, stored, 0) @@ -80,7 +82,8 @@ func TestScheduleUnschedule(t *testing.T) { } func TestScheduleUnscheduleEndpoints(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) config1 := generateIntegration("cluster-check") config2 := generateEndpointsIntegration("endpoints-check1", "node1", false) @@ -96,7 +99,8 @@ func TestScheduleUnscheduleEndpoints(t *testing.T) { } func TestExcludeEndpoint(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) config1 := generateEndpointsIntegration("endpoints-check1", "node1", true) config2 := generateEndpointsIntegration("endpoints-check2", "node2", false) @@ -109,7 +113,8 @@ func TestExcludeEndpoint(t *testing.T) { } func TestScheduleReschedule(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) config := generateIntegration("cluster-check") // Register to node1 @@ -141,7 +146,8 @@ func TestScheduleReschedule(t *testing.T) { } func TestScheduleRescheduleEndpoints(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) config := generateEndpointsIntegration("endpoints-check1", "node1", false) // Register to node1 @@ -172,7 +178,8 @@ func TestScheduleRescheduleEndpoints(t *testing.T) { } func TestDescheduleRescheduleSameNode(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) config := generateIntegration("cluster-check") // Schedule to node1 @@ -205,7 +212,8 @@ func TestDescheduleRescheduleSameNode(t *testing.T) { } func TestProcessNodeStatus(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) status1 := types.NodeStatus{LastChange: 10} // Warmup phase, upToDate is unconditionally true @@ -246,7 +254,8 @@ func TestProcessNodeStatus(t *testing.T) { } func TestGetNodeWithLessChecks(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) // No node registered -> empty string assert.Equal(t, "", dispatcher.getNodeWithLessChecks()) @@ -270,7 +279,8 @@ func TestGetNodeWithLessChecks(t *testing.T) { } func TestExpireNodes(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) // Node with no status (bug ?), handled by expiration dispatcher.addConfig(generateIntegration("one"), "node1") @@ -302,7 +312,8 @@ func TestExpireNodes(t *testing.T) { func TestRescheduleDanglingFromExpiredNodes(t *testing.T) { // This test case can represent a rollout of the cluster check workers - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) // Register a node with a correct status & schedule a Check dispatcher.processNodeStatus("nodeA", "10.0.0.1", types.NodeStatus{}) @@ -352,7 +363,8 @@ func TestRescheduleDanglingFromExpiredNodes(t *testing.T) { } func TestDispatchFourConfigsTwoNodes(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) // Register two nodes dispatcher.processNodeStatus("nodeA", "10.0.0.1", types.NodeStatus{}) @@ -389,7 +401,8 @@ func TestDispatchFourConfigsTwoNodes(t *testing.T) { } func TestDanglingConfig(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) config := integration.Config{ Name: "cluster-check", ClusterCheck: true, @@ -416,7 +429,8 @@ func TestDanglingConfig(t *testing.T) { } func TestUnscheduleDanglingConfig(t *testing.T) { - testDispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + testDispatcher := newDispatcher(fakeTagger) testConfig := integration.Config{ Name: "cluster-check-example", @@ -446,7 +460,8 @@ func TestUnscheduleDanglingConfig(t *testing.T) { } func TestReset(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) config := generateIntegration("cluster-check") // Register to node1 @@ -480,10 +495,11 @@ func TestPatchConfiguration(t *testing.T) { } initialDigest := checkConfig.Digest() + fakeTagger := mock.SetupFakeTagger(t) mockConfig := configmock.New(t) mockConfig.SetWithoutSource("cluster_name", "testing") clustername.ResetClusterName() - dispatcher := newDispatcher() + dispatcher := newDispatcher(fakeTagger) out, err := dispatcher.patchConfiguration(checkConfig) assert.NoError(t, err) @@ -517,10 +533,11 @@ func TestPatchEndpointsConfiguration(t *testing.T) { LogsConfig: integration.Data("[{\"service\":\"any_service\",\"source\":\"any_source\"}]"), } + fakeTagger := mock.SetupFakeTagger(t) mockConfig := configmock.New(t) mockConfig.SetWithoutSource("cluster_name", "testing") clustername.ResetClusterName() - dispatcher := newDispatcher() + dispatcher := newDispatcher(fakeTagger) out, err := dispatcher.patchEndpointsConfiguration(checkConfig) assert.NoError(t, err) @@ -555,20 +572,22 @@ func TestExtraTags(t *testing.T) { {[]string{"one", "two"}, "mycluster", "custom_name", []string{"one", "two", "custom_name:mycluster", "kube_cluster_name:mycluster"}}, } { t.Run("", func(t *testing.T) { + fakeTagger := mock.SetupFakeTagger(t) mockConfig := configmock.New(t) - mockConfig.SetWithoutSource("cluster_checks.extra_tags", tc.extraTagsConfig) + fakeTagger.SetGlobalTags(tc.extraTagsConfig, []string{}, []string{}, []string{}) mockConfig.SetWithoutSource("cluster_name", tc.clusterNameConfig) mockConfig.SetWithoutSource("cluster_checks.cluster_tag_name", tc.tagNameConfig) clustername.ResetClusterName() - dispatcher := newDispatcher() + dispatcher := newDispatcher(fakeTagger) assert.EqualValues(t, tc.expected, dispatcher.extraTags) }) } } func TestGetAllEndpointsCheckConfigs(t *testing.T) { - dispatcher := newDispatcher() + fakeTagger := mock.SetupFakeTagger(t) + dispatcher := newDispatcher(fakeTagger) // Register configs to different nodes dispatcher.addEndpointConfig(generateEndpointsIntegration("endpoints-check1", "node1", false), "node1") @@ -642,10 +661,11 @@ func (d *dummyClientStruct) GetRunnerWorkers(IP string) (types.Workers, error) { } func TestUpdateRunnersStats(t *testing.T) { + fakeTagger := mock.SetupFakeTagger(t) mockConfig := configmock.New(t) mockConfig.SetWithoutSource("cluster_checks.rebalance_with_utilization", true) - dispatcher := newDispatcher() + dispatcher := newDispatcher(fakeTagger) status := types.NodeStatus{LastChange: 10} dispatcher.store.active = true diff --git a/pkg/clusteragent/clusterchecks/handler.go b/pkg/clusteragent/clusterchecks/handler.go index fff45ba64e3d1..27edab2ff6d43 100644 --- a/pkg/clusteragent/clusterchecks/handler.go +++ b/pkg/clusteragent/clusterchecks/handler.go @@ -14,6 +14,7 @@ import ( "time" "github.com/DataDog/datadog-agent/comp/core/autodiscovery/scheduler" + tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def" "github.com/DataDog/datadog-agent/pkg/clusteragent/api" "github.com/DataDog/datadog-agent/pkg/clusteragent/clusterchecks/types" pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" @@ -59,7 +60,7 @@ type Handler struct { // NewHandler returns a populated Handler // It will hook on the specified AutoConfig instance at Start -func NewHandler(ac pluggableAutoConfig) (*Handler, error) { +func NewHandler(ac pluggableAutoConfig, tagger tagger.Component) (*Handler, error) { if ac == nil { return nil, errors.New("empty autoconfig object") } @@ -68,7 +69,7 @@ func NewHandler(ac pluggableAutoConfig) (*Handler, error) { leaderStatusFreq: 5 * time.Second, warmupDuration: pkgconfigsetup.Datadog().GetDuration("cluster_checks.warmup_duration") * time.Second, leadershipChan: make(chan state, 1), - dispatcher: newDispatcher(), + dispatcher: newDispatcher(tagger), } if pkgconfigsetup.Datadog().GetBool("leader_election") { diff --git a/pkg/clusteragent/clusterchecks/handler_test.go b/pkg/clusteragent/clusterchecks/handler_test.go index bbf348e102674..c1b0c074e6aaf 100644 --- a/pkg/clusteragent/clusterchecks/handler_test.go +++ b/pkg/clusteragent/clusterchecks/handler_test.go @@ -20,6 +20,7 @@ import ( "github.com/stretchr/testify/mock" "github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration" + taggerMock "github.com/DataDog/datadog-agent/comp/core/tagger/mock" "github.com/DataDog/datadog-agent/pkg/clusteragent/api" "github.com/DataDog/datadog-agent/pkg/clusteragent/clusterchecks/types" "github.com/DataDog/datadog-agent/pkg/util/testutil" @@ -138,6 +139,7 @@ func TestUpdateLeaderIP(t *testing.T) { func TestHandlerRun(t *testing.T) { dummyT := &testing.T{} ac := &mockedPluggableAutoConfig{} + fakeTagger := taggerMock.SetupFakeTagger(t) ac.Test(t) le := &fakeLeaderEngine{ err: errors.New("failing"), @@ -154,7 +156,7 @@ func TestHandlerRun(t *testing.T) { leaderStatusFreq: 100 * time.Millisecond, warmupDuration: 250 * time.Millisecond, leadershipChan: make(chan state, 1), - dispatcher: newDispatcher(), + dispatcher: newDispatcher(fakeTagger), leaderStatusCallback: le.get, leaderForwarder: api.NewLeaderForwarder(testPort, 10), } diff --git a/pkg/collector/corechecks/sbom/processor.go b/pkg/collector/corechecks/sbom/processor.go index 0be97cf271cb3..77c1e54a796e4 100644 --- a/pkg/collector/corechecks/sbom/processor.go +++ b/pkg/collector/corechecks/sbom/processor.go @@ -77,6 +77,7 @@ func newProcessor(workloadmetaStore workloadmeta.Component, sender sender.Sender } sender.EventPlatformEvent(encoded, eventplatform.EventTypeContainerSBOM) + log.Debugf("SBOM event sent with %d entities", len(entities)) }), workloadmetaStore: workloadmetaStore, tagger: tagger, @@ -94,24 +95,39 @@ func (p *processor) processContainerImagesEvents(evBundle workloadmeta.EventBund log.Tracef("Processing %d events", len(evBundle.Events)) + // Separate events into images and containers + var imageEvents []workloadmeta.Event + var containerEvents []workloadmeta.Event + for _, event := range evBundle.Events { - switch event.Entity.GetID().Kind { + entityID := event.Entity.GetID() + switch entityID.Kind { case workloadmeta.KindContainerImageMetadata: - switch event.Type { - case workloadmeta.EventTypeSet: - p.registerImage(event.Entity.(*workloadmeta.ContainerImageMetadata)) - p.processImageSBOM(event.Entity.(*workloadmeta.ContainerImageMetadata)) - case workloadmeta.EventTypeUnset: - p.unregisterImage(event.Entity.(*workloadmeta.ContainerImageMetadata)) - // Let the SBOM expire on back-end side - } + imageEvents = append(imageEvents, event) case workloadmeta.KindContainer: - switch event.Type { - case workloadmeta.EventTypeSet: - p.registerContainer(event.Entity.(*workloadmeta.Container)) - case workloadmeta.EventTypeUnset: - p.unregisterContainer(event.Entity.(*workloadmeta.Container)) - } + containerEvents = append(containerEvents, event) + } + } + + // Process all image events first + for _, event := range imageEvents { + switch event.Type { + case workloadmeta.EventTypeSet: + p.registerImage(event.Entity.(*workloadmeta.ContainerImageMetadata)) + p.processImageSBOM(event.Entity.(*workloadmeta.ContainerImageMetadata)) + case workloadmeta.EventTypeUnset: + p.unregisterImage(event.Entity.(*workloadmeta.ContainerImageMetadata)) + // Let the SBOM expire on back-end side + } + } + + // Process all container events after images + for _, event := range containerEvents { + switch event.Type { + case workloadmeta.EventTypeSet: + p.registerContainer(event.Entity.(*workloadmeta.Container)) + case workloadmeta.EventTypeUnset: + p.unregisterContainer(event.Entity.(*workloadmeta.Container)) } } } diff --git a/pkg/collector/corechecks/servicediscovery/apm/detect_nix_test.go b/pkg/collector/corechecks/servicediscovery/apm/detect_nix_test.go index 2eba255192838..898e1dcaa9c30 100644 --- a/pkg/collector/corechecks/servicediscovery/apm/detect_nix_test.go +++ b/pkg/collector/corechecks/servicediscovery/apm/detect_nix_test.go @@ -307,6 +307,7 @@ func TestGoDetector(t *testing.T) { require.NoError(t, cmd.Start()) t.Cleanup(func() { _ = cmd.Process.Kill() + cmd.Wait() }) require.Eventually(t, func() bool { if cmd.Process.Pid == 0 { diff --git a/pkg/collector/corechecks/servicediscovery/apm/testutil/instrumented/instrumented.go b/pkg/collector/corechecks/servicediscovery/apm/testutil/instrumented/instrumented.go index 74b92599e66cb..6616ec99fefcb 100644 --- a/pkg/collector/corechecks/servicediscovery/apm/testutil/instrumented/instrumented.go +++ b/pkg/collector/corechecks/servicediscovery/apm/testutil/instrumented/instrumented.go @@ -8,12 +8,29 @@ package main import ( - "time" + "fmt" + "os" + "os/signal" + "syscall" "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer" ) func main() { tracer.Start() - time.Sleep(time.Second * 20) + + signalChan := make(chan os.Signal, 1) + done := make(chan bool, 1) + + signal.Notify(signalChan, os.Interrupt, syscall.SIGTERM, syscall.SIGINT) + + go func() { + <-signalChan + done <- true + }() + + fmt.Println("Running... Press Ctrl+C to exit.") + + <-done // Block until a signal is received + fmt.Println("Gracefully shutting down.") } diff --git a/pkg/collector/corechecks/servicediscovery/apm/testutil/instrumented2/instrumented2.go b/pkg/collector/corechecks/servicediscovery/apm/testutil/instrumented2/instrumented2.go index b52588d9684a8..8b78293ec5bbf 100644 --- a/pkg/collector/corechecks/servicediscovery/apm/testutil/instrumented2/instrumented2.go +++ b/pkg/collector/corechecks/servicediscovery/apm/testutil/instrumented2/instrumented2.go @@ -9,7 +9,9 @@ package main import ( "fmt" - "time" + "os" + "os/signal" + "syscall" "github.com/DataDog/dd-trace-go/v2/ddtrace/tracer" ) @@ -20,5 +22,19 @@ func main() { fmt.Println(err) } - time.Sleep(time.Second * 20) + // Create a channel to listen for OS signals + signalChan := make(chan os.Signal, 1) + done := make(chan bool, 1) + + signal.Notify(signalChan, os.Interrupt, syscall.SIGTERM, syscall.SIGINT) + + go func() { + <-signalChan + done <- true + }() + + fmt.Println("Running... Press Ctrl+C to exit.") + + <-done // Block until a signal is received + fmt.Println("Gracefully shutting down.") } diff --git a/pkg/collector/corechecks/servicediscovery/impl_linux.go b/pkg/collector/corechecks/servicediscovery/impl_linux.go index 30f4230425349..c85e66d380a1a 100644 --- a/pkg/collector/corechecks/servicediscovery/impl_linux.go +++ b/pkg/collector/corechecks/servicediscovery/impl_linux.go @@ -104,6 +104,8 @@ func (li *linuxImpl) DiscoverServices() (*discoveredServices, error) { svc.LastHeartbeat = now svc.service.RSS = service.RSS svc.service.CPUCores = service.CPUCores + svc.service.ContainerID = service.ContainerID + svc.service.GeneratedName = service.GeneratedName events.heartbeat = append(events.heartbeat, *svc) } } @@ -139,6 +141,8 @@ func (li *linuxImpl) handlePotentialServices(events *serviceEvents, now time.Tim svc.LastHeartbeat = now svc.service.RSS = service.RSS svc.service.CPUCores = service.CPUCores + svc.service.ContainerID = service.ContainerID + svc.service.GeneratedName = service.GeneratedName li.aliveServices[pid] = svc events.start = append(events.start, *svc) diff --git a/pkg/collector/corechecks/servicediscovery/model/model.go b/pkg/collector/corechecks/servicediscovery/model/model.go index df67422cb089e..f45cbae150eb4 100644 --- a/pkg/collector/corechecks/servicediscovery/model/model.go +++ b/pkg/collector/corechecks/servicediscovery/model/model.go @@ -8,19 +8,20 @@ package model // Service represents a listening process. type Service struct { - PID int `json:"pid"` - Name string `json:"name"` - GeneratedName string `json:"generated_name"` - DDService string `json:"dd_service"` - DDServiceInjected bool `json:"dd_service_injected"` - Ports []uint16 `json:"ports"` - APMInstrumentation string `json:"apm_instrumentation"` - Language string `json:"language"` - RSS uint64 `json:"rss"` - CommandLine []string `json:"cmdline"` - StartTimeMilli uint64 `json:"start_time"` - CPUCores float64 `json:"cpu_cores"` - ContainerID string `json:"container_id"` + PID int `json:"pid"` + Name string `json:"name"` + GeneratedName string `json:"generated_name"` + DDService string `json:"dd_service"` + DDServiceInjected bool `json:"dd_service_injected"` + CheckedContainerData bool `json:"checked_container_data"` + Ports []uint16 `json:"ports"` + APMInstrumentation string `json:"apm_instrumentation"` + Language string `json:"language"` + RSS uint64 `json:"rss"` + CommandLine []string `json:"cmdline"` + StartTimeMilli uint64 `json:"start_time"` + CPUCores float64 `json:"cpu_cores"` + ContainerID string `json:"container_id"` } // ServicesResponse is the response for the system-probe /discovery/services endpoint. diff --git a/pkg/collector/corechecks/servicediscovery/module/impl_linux.go b/pkg/collector/corechecks/servicediscovery/module/impl_linux.go index 70cee8a18beee..dd02e8a13c2f4 100644 --- a/pkg/collector/corechecks/servicediscovery/module/impl_linux.go +++ b/pkg/collector/corechecks/servicediscovery/module/impl_linux.go @@ -20,6 +20,7 @@ import ( "sync" "time" + agentPayload "github.com/DataDog/agent-payload/v5/process" "github.com/shirou/gopsutil/v3/process" "github.com/DataDog/datadog-agent/cmd/system-probe/api/module" @@ -48,15 +49,16 @@ var _ module.Module = &discovery{} // serviceInfo holds process data that should be cached between calls to the // endpoint. type serviceInfo struct { - generatedName string - ddServiceName string - ddServiceInjected bool - language language.Language - apmInstrumentation apm.Instrumentation - cmdLine []string - startTimeMilli uint64 - cpuTime uint64 - cpuUsage float64 + generatedName string + ddServiceName string + ddServiceInjected bool + checkedContainerData bool + language language.Language + apmInstrumentation apm.Instrumentation + cmdLine []string + startTimeMilli uint64 + cpuTime uint64 + cpuUsage float64 } // discovery is an implementation of the Module interface for the discovery module. @@ -551,18 +553,19 @@ func (s *discovery) getService(context parsingContext, pid int32) *model.Service } return &model.Service{ - PID: int(pid), - Name: name, - GeneratedName: info.generatedName, - DDService: info.ddServiceName, - DDServiceInjected: info.ddServiceInjected, - Ports: ports, - APMInstrumentation: string(info.apmInstrumentation), - Language: string(info.language), - RSS: rss, - CommandLine: info.cmdLine, - StartTimeMilli: info.startTimeMilli, - CPUCores: info.cpuUsage, + PID: int(pid), + Name: name, + GeneratedName: info.generatedName, + DDService: info.ddServiceName, + DDServiceInjected: info.ddServiceInjected, + CheckedContainerData: info.checkedContainerData, + Ports: ports, + APMInstrumentation: string(info.apmInstrumentation), + Language: string(info.language), + RSS: rss, + CommandLine: info.cmdLine, + StartTimeMilli: info.startTimeMilli, + CPUCores: info.cpuUsage, } } @@ -613,6 +616,89 @@ func (s *discovery) updateServicesCPUStats(services []model.Service) error { return nil } +func getServiceNameFromContainerTags(tags []string) string { + // The tags we look for service name generation, in their priority order. + // The map entries will be filled as we go through the containers tags. + tagsPriority := []struct { + tagName string + tagValue *string + }{ + {"service", nil}, + {"app", nil}, + {"short_image", nil}, + {"kube_container_name", nil}, + {"kube_deployment", nil}, + {"kube_service", nil}, + } + + for _, tag := range tags { + // Get index of separator between name and value + sepIndex := strings.IndexRune(tag, ':') + if sepIndex < 0 || sepIndex >= len(tag)-1 { + // Malformed tag; we skip it + continue + } + + for i := range tagsPriority { + if tag[:sepIndex] != tagsPriority[i].tagName { + // Not a tag we care about; we skip it + continue + } + + value := tag[sepIndex+1:] + tagsPriority[i].tagValue = &value + break + } + } + + for _, tag := range tagsPriority { + if tag.tagValue == nil { + continue + } + + log.Debugf("Using %v:%v tag for service name", tag.tagName, *tag.tagValue) + return *tag.tagValue + } + + return "" +} + +func (s *discovery) enrichContainerData(service *model.Service, containers map[string]*agentPayload.Container, pidToCid map[int]string) { + id, ok := pidToCid[service.PID] + if !ok { + return + } + + service.ContainerID = id + + // We got the service name from container tags before, no need to do it again. + if service.CheckedContainerData { + return + } + + container, ok := containers[id] + if !ok { + return + } + + serviceName := getServiceNameFromContainerTags(container.Tags) + + if serviceName != "" { + service.GeneratedName = serviceName + } + service.CheckedContainerData = true + + s.mux.Lock() + serviceInfo, ok := s.cache[int32(service.PID)] + if ok { + if serviceName != "" { + serviceInfo.generatedName = serviceName + } + serviceInfo.checkedContainerData = true + } + s.mux.Unlock() +} + // getStatus returns the list of currently running services. func (s *discovery) getServices() (*[]model.Service, error) { procRoot := kernel.ProcFSRoot() @@ -628,11 +714,18 @@ func (s *discovery) getServices() (*[]model.Service, error) { var services []model.Service alivePids := make(map[int32]struct{}, len(pids)) - _, _, pidToCid, err := s.containerProvider.GetContainers(1*time.Minute, nil) + containers, _, pidToCid, err := s.containerProvider.GetContainers(1*time.Minute, nil) if err != nil { log.Errorf("could not get containers: %s", err) } + // Build mapping of Container ID to container object to avoid traversal of + // the containers slice for every services. + containersMap := make(map[string]*agentPayload.Container, len(containers)) + for _, c := range containers { + containersMap[c.Id] = c + } + for _, pid := range pids { alivePids[pid] = struct{}{} @@ -640,10 +733,7 @@ func (s *discovery) getServices() (*[]model.Service, error) { if service == nil { continue } - - if id, ok := pidToCid[service.PID]; ok { - service.ContainerID = id - } + s.enrichContainerData(service, containersMap, pidToCid) services = append(services, *service) } diff --git a/pkg/collector/corechecks/servicediscovery/module/impl_linux_test.go b/pkg/collector/corechecks/servicediscovery/module/impl_linux_test.go index 6d5610c952a24..a8154ea1b7925 100644 --- a/pkg/collector/corechecks/servicediscovery/module/impl_linux_test.go +++ b/pkg/collector/corechecks/servicediscovery/module/impl_linux_test.go @@ -28,6 +28,7 @@ import ( "testing" "time" + agentPayload "github.com/DataDog/agent-payload/v5/process" "github.com/golang/mock/gomock" gorillamux "github.com/gorilla/mux" "github.com/prometheus/procfs" @@ -812,9 +813,21 @@ func TestDocker(t *testing.T) { } if comm == "python-1111" { pid1111 = process.PID - mockContainerProvider.EXPECT().GetContainers(1*time.Minute, nil).Return(nil, nil, map[int]string{ - pid1111: "dummyCID", - }, nil) + mockContainerProvider. + EXPECT(). + GetContainers(1*time.Minute, nil). + Return( + []*agentPayload.Container{ + {Id: "dummyCID", Tags: []string{ + "sometag:somevalue", + "kube_service:kube_foo", // Should not have priority compared to app tag, for service naming + "app:foo_from_app_tag", + }}, + }, + nil, + map[int]string{ + pid1111: "dummyCID", + }, nil) break } @@ -827,6 +840,7 @@ func TestDocker(t *testing.T) { require.Contains(t, portMap, pid1111) require.Contains(t, portMap[pid1111].Ports, uint16(1234)) require.Contains(t, portMap[pid1111].ContainerID, "dummyCID") + require.Contains(t, portMap[pid1111].GeneratedName, "foo_from_app_tag") } // Check that the cache is cleaned when procceses die. @@ -895,6 +909,113 @@ func TestCache(t *testing.T) { require.Empty(t, discovery.cache) } +func TestTagsPriority(t *testing.T) { + cases := []struct { + name string + tags []string + expectedServiceName string + }{ + { + "nil tag list", + nil, + "", + }, + { + "empty tag list", + []string{}, + "", + }, + { + "no useful tags", + []string{"foo:bar"}, + "", + }, + { + "malformed tag", + []string{"foobar"}, + "", + }, + { + "service tag", + []string{"service:foo"}, + "foo", + }, + { + "app tag", + []string{"app:foo"}, + "foo", + }, + { + "short_image tag", + []string{"short_image:foo"}, + "foo", + }, + { + "kube_container_name tag", + []string{"kube_container_name:foo"}, + "foo", + }, + { + "kube_deployment tag", + []string{"kube_deployment:foo"}, + "foo", + }, + { + "kube_service tag", + []string{"kube_service:foo"}, + "foo", + }, + { + "multiple tags", + []string{ + "foo:bar", + "baz:biz", + "service:my_service", + "malformed", + }, + "my_service", + }, + { + "empty value", + []string{ + "service:", + "app:foo", + }, + "foo", + }, + { + "multiple tags with priority", + []string{ + "foo:bar", + "short_image:my_image", + "baz:biz", + "service:my_service", + "malformed", + }, + "my_service", + }, + { + "all priority tags", + []string{ + "kube_service:my_kube_service", + "kube_deployment:my_kube_deployment", + "kube_container_name:my_kube_container_name", + "short_iamge:my_short_image", + "app:my_app", + "service:my_service", + }, + "my_service", + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + name := getServiceNameFromContainerTags(c.tags) + require.Equalf(t, c.expectedServiceName, name, "got wrong service name from container tags") + }) + } +} + func BenchmarkOldProcess(b *testing.B) { b.ResetTimer() b.ReportAllocs() diff --git a/pkg/collector/runner/runner.go b/pkg/collector/runner/runner.go index e6ba495f02f2b..a388e73dea776 100644 --- a/pkg/collector/runner/runner.go +++ b/pkg/collector/runner/runner.go @@ -14,6 +14,7 @@ import ( "go.uber.org/atomic" + haagent "github.com/DataDog/datadog-agent/comp/haagent/def" "github.com/DataDog/datadog-agent/pkg/aggregator/sender" "github.com/DataDog/datadog-agent/pkg/collector/check" checkid "github.com/DataDog/datadog-agent/pkg/collector/check/id" @@ -40,6 +41,7 @@ var ( // Runner is the object in charge of running all the checks type Runner struct { senderManager sender.SenderManager + haAgent haagent.Component isRunning *atomic.Bool id int // Globally unique identifier for the Runner workers map[int]*worker.Worker // Workers currrently under this Runner's management @@ -52,11 +54,12 @@ type Runner struct { } // NewRunner takes the number of desired goroutines processing incoming checks. -func NewRunner(senderManager sender.SenderManager) *Runner { +func NewRunner(senderManager sender.SenderManager, haAgent haagent.Component) *Runner { numWorkers := pkgconfigsetup.Datadog().GetInt("check_runners") r := &Runner{ senderManager: senderManager, + haAgent: haAgent, id: int(runnerIDGenerator.Inc()), isRunning: atomic.NewBool(true), workers: make(map[int]*worker.Worker), @@ -117,6 +120,7 @@ func (r *Runner) AddWorker() { func (r *Runner) newWorker() (*worker.Worker, error) { worker, err := worker.NewWorker( r.senderManager, + r.haAgent, r.id, int(workerIDGenerator.Inc()), r.pendingChecksChan, diff --git a/pkg/collector/runner/runner_test.go b/pkg/collector/runner/runner_test.go index 6ae3cd335c81b..b6a02aab0739f 100644 --- a/pkg/collector/runner/runner_test.go +++ b/pkg/collector/runner/runner_test.go @@ -15,6 +15,7 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/atomic" + haagentmock "github.com/DataDog/datadog-agent/comp/haagent/mock" "github.com/DataDog/datadog-agent/pkg/aggregator" checkid "github.com/DataDog/datadog-agent/pkg/collector/check/id" "github.com/DataDog/datadog-agent/pkg/collector/check/stub" @@ -152,7 +153,7 @@ func TestNewRunner(t *testing.T) { testSetUp(t) pkgconfigsetup.Datadog().SetWithoutSource("check_runners", "3") - r := NewRunner(aggregator.NewNoOpSenderManager()) + r := NewRunner(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent()) require.NotNil(t, r) defer r.Stop() @@ -166,7 +167,7 @@ func TestRunnerAddWorker(t *testing.T) { testSetUp(t) pkgconfigsetup.Datadog().SetWithoutSource("check_runners", "1") - r := NewRunner(aggregator.NewNoOpSenderManager()) + r := NewRunner(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent()) require.NotNil(t, r) defer r.Stop() @@ -181,7 +182,7 @@ func TestRunnerStaticUpdateNumWorkers(t *testing.T) { testSetUp(t) pkgconfigsetup.Datadog().SetWithoutSource("check_runners", "2") - r := NewRunner(aggregator.NewNoOpSenderManager()) + r := NewRunner(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent()) require.NotNil(t, r) defer func() { r.Stop() @@ -212,7 +213,7 @@ func TestRunnerDynamicUpdateNumWorkers(t *testing.T) { assertAsyncWorkerCount(t, 0) min, max, expectedWorkers := testCase[0], testCase[1], testCase[2] - r := NewRunner(aggregator.NewNoOpSenderManager()) + r := NewRunner(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent()) require.NotNil(t, r) for checks := min; checks <= max; checks++ { @@ -234,7 +235,7 @@ func TestRunner(t *testing.T) { checks[idx] = newCheck(t, fmt.Sprintf("mycheck_%d:123", idx), false, nil) } - r := NewRunner(aggregator.NewNoOpSenderManager()) + r := NewRunner(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent()) require.NotNil(t, r) defer r.Stop() @@ -262,7 +263,7 @@ func TestRunnerStop(t *testing.T) { checks[idx].RunLock.Lock() } - r := NewRunner(aggregator.NewNoOpSenderManager()) + r := NewRunner(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent()) require.NotNil(t, r) defer r.Stop() @@ -320,7 +321,7 @@ func TestRunnerStopWithStuckCheck(t *testing.T) { blockedCheck.RunLock.Lock() blockedCheck.StopLock.Lock() - r := NewRunner(aggregator.NewNoOpSenderManager()) + r := NewRunner(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent()) require.NotNil(t, r) defer r.Stop() @@ -369,7 +370,7 @@ func TestRunnerStopCheck(t *testing.T) { blockedCheck.RunLock.Lock() blockedCheck.StopLock.Lock() - r := NewRunner(aggregator.NewNoOpSenderManager()) + r := NewRunner(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent()) require.NotNil(t, r) defer func() { r.Stop() @@ -413,7 +414,7 @@ func TestRunnerScheduler(t *testing.T) { sched1 := newScheduler() sched2 := newScheduler() - r := NewRunner(aggregator.NewNoOpSenderManager()) + r := NewRunner(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent()) require.NotNil(t, r) defer r.Stop() @@ -433,7 +434,7 @@ func TestRunnerShouldAddCheckStats(t *testing.T) { testCheck := newCheck(t, "test", false, nil) sched := newScheduler() - r := NewRunner(aggregator.NewNoOpSenderManager()) + r := NewRunner(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent()) require.NotNil(t, r) defer r.Stop() diff --git a/pkg/collector/worker/worker.go b/pkg/collector/worker/worker.go index 00a0b40668135..144ecf58240f2 100644 --- a/pkg/collector/worker/worker.go +++ b/pkg/collector/worker/worker.go @@ -10,6 +10,7 @@ import ( "fmt" "time" + haagent "github.com/DataDog/datadog-agent/comp/haagent/def" "github.com/DataDog/datadog-agent/pkg/aggregator/sender" "github.com/DataDog/datadog-agent/pkg/collector/check" checkid "github.com/DataDog/datadog-agent/pkg/collector/check/id" @@ -53,11 +54,13 @@ type Worker struct { runnerID int shouldAddCheckStatsFunc func(id checkid.ID) bool utilizationTickInterval time.Duration + haAgent haagent.Component } // NewWorker returns an instance of a `Worker` after parameter sanity checks are passed func NewWorker( senderManager sender.SenderManager, + haAgent haagent.Component, runnerID int, ID int, pendingChecksChan chan check.Check, @@ -84,6 +87,7 @@ func NewWorker( checksTracker, shouldAddCheckStatsFunc, senderManager.GetDefaultSender, + haAgent, pollingInterval, ) } @@ -98,6 +102,7 @@ func newWorkerWithOptions( checksTracker *tracker.RunningChecksTracker, shouldAddCheckStatsFunc func(id checkid.ID) bool, getDefaultSenderFunc func() (sender.Sender, error), + haAgent haagent.Component, utilizationTickInterval time.Duration, ) (*Worker, error) { @@ -115,6 +120,7 @@ func newWorkerWithOptions( runnerID: runnerID, shouldAddCheckStatsFunc: shouldAddCheckStatsFunc, getDefaultSenderFunc: getDefaultSenderFunc, + haAgent: haAgent, utilizationTickInterval: utilizationTickInterval, }, nil } @@ -135,6 +141,11 @@ func (w *Worker) Run() { checkLogger := CheckLogger{Check: check} longRunning := check.Interval() == 0 + if !w.haAgent.ShouldRunIntegration(check.String()) { + checkLogger.Debug("Check is an HA integration and current agent is not leader, skipping execution...") + continue + } + // Add check to tracker if it's not already running if !w.checksTracker.AddCheck(check) { checkLogger.Debug("Check is already running, skipping execution...") diff --git a/pkg/collector/worker/worker_test.go b/pkg/collector/worker/worker_test.go index 129ed30a499e8..3bbd9e659e224 100644 --- a/pkg/collector/worker/worker_test.go +++ b/pkg/collector/worker/worker_test.go @@ -15,7 +15,12 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/atomic" + "go.uber.org/fx" + "github.com/DataDog/datadog-agent/comp/core/config" + logmock "github.com/DataDog/datadog-agent/comp/core/log/mock" + haagentimpl "github.com/DataDog/datadog-agent/comp/haagent/impl" + haagentmock "github.com/DataDog/datadog-agent/comp/haagent/mock" "github.com/DataDog/datadog-agent/pkg/aggregator" "github.com/DataDog/datadog-agent/pkg/aggregator/mocksender" "github.com/DataDog/datadog-agent/pkg/aggregator/sender" @@ -26,6 +31,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/collector/runner/tracker" pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" "github.com/DataDog/datadog-agent/pkg/metrics/servicecheck" + "github.com/DataDog/datadog-agent/pkg/util/fxutil" ) type testCheck struct { @@ -122,16 +128,16 @@ func TestWorkerInit(t *testing.T) { mockShouldAddStatsFunc := func(checkid.ID) bool { return true } senderManager := aggregator.NewNoOpSenderManager() - _, err := NewWorker(senderManager, 1, 2, nil, checksTracker, mockShouldAddStatsFunc) + _, err := NewWorker(senderManager, haagentmock.NewMockHaAgent(), 1, 2, nil, checksTracker, mockShouldAddStatsFunc) require.NotNil(t, err) - _, err = NewWorker(senderManager, 1, 2, pendingChecksChan, nil, mockShouldAddStatsFunc) + _, err = NewWorker(senderManager, haagentmock.NewMockHaAgent(), 1, 2, pendingChecksChan, nil, mockShouldAddStatsFunc) require.NotNil(t, err) - _, err = NewWorker(senderManager, 1, 2, pendingChecksChan, checksTracker, nil) + _, err = NewWorker(senderManager, haagentmock.NewMockHaAgent(), 1, 2, pendingChecksChan, checksTracker, nil) require.NotNil(t, err) - worker, err := NewWorker(senderManager, 1, 2, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) + worker, err := NewWorker(senderManager, haagentmock.NewMockHaAgent(), 1, 2, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) assert.Nil(t, err) assert.NotNil(t, worker) } @@ -150,7 +156,7 @@ func TestWorkerInitExpvarStats(t *testing.T) { go func(idx int) { defer wg.Done() - worker, err := NewWorker(aggregator.NewNoOpSenderManager(), 1, idx, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) + worker, err := NewWorker(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent(), 1, idx, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) assert.Nil(t, err) worker.Run() @@ -172,7 +178,7 @@ func TestWorkerName(t *testing.T) { for _, id := range []int{1, 100, 500} { expectedName := fmt.Sprintf("worker_%d", id) - worker, err := NewWorker(aggregator.NewNoOpSenderManager(), 1, id, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) + worker, err := NewWorker(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent(), 1, id, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) assert.Nil(t, err) assert.NotNil(t, worker) @@ -224,7 +230,7 @@ func TestWorker(t *testing.T) { pendingChecksChan <- testCheck1 close(pendingChecksChan) - worker, err := NewWorker(aggregator.NewNoOpSenderManager(), 100, 200, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) + worker, err := NewWorker(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent(), 100, 200, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) require.Nil(t, err) wg.Add(1) @@ -284,6 +290,7 @@ func TestWorkerUtilizationExpvars(t *testing.T) { checksTracker, mockShouldAddStatsFunc, func() (sender.Sender, error) { return nil, nil }, + haagentmock.NewMockHaAgent(), 100*time.Millisecond, ) require.Nil(t, err) @@ -354,7 +361,7 @@ func TestWorkerErrorAndWarningHandling(t *testing.T) { } close(pendingChecksChan) - worker, err := NewWorker(aggregator.NewNoOpSenderManager(), 100, 200, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) + worker, err := NewWorker(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent(), 100, 200, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) require.Nil(t, err) AssertAsyncWorkerCount(t, 0) @@ -399,7 +406,7 @@ func TestWorkerConcurrentCheckScheduling(t *testing.T) { pendingChecksChan <- testCheck close(pendingChecksChan) - worker, err := NewWorker(aggregator.NewNoOpSenderManager(), 100, 200, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) + worker, err := NewWorker(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent(), 100, 200, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) require.Nil(t, err) worker.Run() @@ -453,7 +460,7 @@ func TestWorkerStatsAddition(t *testing.T) { pendingChecksChan <- squelchedStatsCheck close(pendingChecksChan) - worker, err := NewWorker(aggregator.NewNoOpSenderManager(), 100, 200, pendingChecksChan, checksTracker, shouldAddStatsFunc) + worker, err := NewWorker(aggregator.NewNoOpSenderManager(), haagentmock.NewMockHaAgent(), 100, 200, pendingChecksChan, checksTracker, shouldAddStatsFunc) require.Nil(t, err) worker.Run() @@ -505,6 +512,7 @@ func TestWorkerServiceCheckSending(t *testing.T) { func() (sender.Sender, error) { return mockSender, nil }, + haagentmock.NewMockHaAgent(), pollingInterval, ) require.Nil(t, err) @@ -575,6 +583,7 @@ func TestWorkerSenderNil(t *testing.T) { func() (sender.Sender, error) { return nil, fmt.Errorf("testerr") }, + haagentmock.NewMockHaAgent(), pollingInterval, ) require.Nil(t, err) @@ -615,6 +624,7 @@ func TestWorkerServiceCheckSendingLongRunningTasks(t *testing.T) { func() (sender.Sender, error) { return mockSender, nil }, + haagentmock.NewMockHaAgent(), pollingInterval, ) require.Nil(t, err) @@ -628,6 +638,98 @@ func TestWorkerServiceCheckSendingLongRunningTasks(t *testing.T) { mockSender.AssertNumberOfCalls(t, "ServiceCheck", 0) } +func TestWorker_HaIntegration(t *testing.T) { + testHostname := "myhost" + + tests := []struct { + name string + haAgentEnabled bool + setLeaderValue string + expectedSnmpCheckRunCount int + expectedUnknownCheckRunCount int + }{ + { + name: "ha-agent enabled and is leader", + // should run HA-integrations + // should run "non HA integrations" + haAgentEnabled: true, + setLeaderValue: testHostname, + expectedSnmpCheckRunCount: 1, + expectedUnknownCheckRunCount: 1, + }, + { + name: "ha-agent enabled and not leader", + // should skip HA-integrations + // should run "non HA integrations" + haAgentEnabled: true, + setLeaderValue: "leader-is-another-agent", + expectedSnmpCheckRunCount: 0, + expectedUnknownCheckRunCount: 1, + }, + { + name: "ha-agent disabled", + // When ha-agent is disabled, the agent behave as standalone agent (non HA) and will always run all integrations. + // should run all integrations + haAgentEnabled: false, + setLeaderValue: "", + expectedSnmpCheckRunCount: 1, + expectedUnknownCheckRunCount: 1, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + expvars.Reset() + + var wg sync.WaitGroup + + checksTracker := tracker.NewRunningChecksTracker() + pendingChecksChan := make(chan check.Check, 10) + mockShouldAddStatsFunc := func(checkid.ID) bool { return true } + + snmpCheck := newCheck(t, "snmp:123", false, nil) + unknownCheck := newCheck(t, "unknown-check:123", false, nil) + + pendingChecksChan <- snmpCheck + pendingChecksChan <- unknownCheck + close(pendingChecksChan) + + agentConfigs := map[string]interface{}{ + "hostname": testHostname, + "ha_agent.enabled": tt.haAgentEnabled, + "ha_agent.group": "my-group-01", + } + logComponent := logmock.New(t) + agentConfigComponent := fxutil.Test[config.Component](t, fx.Options( + config.MockModule(), + fx.Replace(config.MockParams{Overrides: agentConfigs}), + )) + requires := haagentimpl.Requires{ + Logger: logComponent, + AgentConfig: agentConfigComponent, + } + haagentcomp, _ := haagentimpl.NewComponent(requires) + haagentcomp.Comp.SetLeader(tt.setLeaderValue) + + worker, err := NewWorker(aggregator.NewNoOpSenderManager(), haagentcomp.Comp, 100, 200, pendingChecksChan, checksTracker, mockShouldAddStatsFunc) + require.Nil(t, err) + + wg.Add(1) + go func() { + defer wg.Done() + worker.Run() + }() + + wg.Wait() + + assert.Equal(t, tt.expectedSnmpCheckRunCount, snmpCheck.RunCount()) + assert.Equal(t, tt.expectedUnknownCheckRunCount, unknownCheck.RunCount()) + + // make sure the check is deleted from checksTracker + assert.Equal(t, 0, len(checksTracker.RunningChecks())) + }) + } +} + // getWorkerUtilizationExpvar returns the utilization as presented by expvars // for a named worker. func getWorkerUtilizationExpvar(t *testing.T, name string) float64 { diff --git a/pkg/config/model/types.go b/pkg/config/model/types.go index 9d22089e4d648..f97ec7efff5c5 100644 --- a/pkg/config/model/types.go +++ b/pkg/config/model/types.go @@ -82,6 +82,9 @@ type Reader interface { // OnUpdate adds a callback to the list receivers to be called each time a value is change in the configuration // by a call to the 'Set' method. The configuration will sequentially call each receiver. OnUpdate(callback NotificationReceiver) + + // Stringify stringifies the config + Stringify(source Source) string } // Writer is a subset of Config that only allows writing the configuration @@ -89,7 +92,6 @@ type Writer interface { Set(key string, value interface{}, source Source) SetWithoutSource(key string, value interface{}) UnsetForSource(key string, source Source) - CopyConfig(cfg Config) } // ReaderWriter is a subset of Config that allows reading and writing the configuration diff --git a/pkg/config/model/viper.go b/pkg/config/model/viper.go index 99964f052c2d6..4bac624a9270b 100644 --- a/pkg/config/model/viper.go +++ b/pkg/config/model/viper.go @@ -835,24 +835,9 @@ func NewConfig(name string, envPrefix string, envKeyReplacer *strings.Replacer) return &config } -// CopyConfig copies the given config to the receiver config. This should only be used in tests as replacing -// the global config reference is unsafe. -func (c *safeConfig) CopyConfig(cfg Config) { - c.Lock() - defer c.Unlock() - - if cfg, ok := cfg.(*safeConfig); ok { - c.Viper = cfg.Viper - c.configSources = cfg.configSources - c.envPrefix = cfg.envPrefix - c.envKeyReplacer = cfg.envKeyReplacer - c.proxies = cfg.proxies - c.configEnvVars = cfg.configEnvVars - c.unknownKeys = cfg.unknownKeys - c.notificationReceivers = cfg.notificationReceivers - return - } - panic("Replacement config must be an instance of safeConfig") +// Stringify stringifies the config, but only for nodetremodel with the test build tag +func (c *safeConfig) Stringify(_ Source) string { + return "safeConfig{...}" } // GetProxies returns the proxy settings from the configuration diff --git a/pkg/config/model/viper_test.go b/pkg/config/model/viper_test.go index acd73ca26503c..018b8d95ab804 100644 --- a/pkg/config/model/viper_test.go +++ b/pkg/config/model/viper_test.go @@ -249,26 +249,6 @@ func TestCheckKnownKey(t *testing.T) { assert.Contains(t, config.unknownKeys, "foobar") } -func TestCopyConfig(t *testing.T) { - config := NewConfig("test", "DD", strings.NewReplacer(".", "_")) // nolint: forbidigo - config.SetDefault("baz", "qux") - config.Set("foo", "bar", SourceFile) - config.BindEnv("xyz", "XXYYZZ") - config.SetKnown("tyu") - config.OnUpdate(func(_ string, _, _ any) {}) - - backup := NewConfig("test", "DD", strings.NewReplacer(".", "_")) // nolint: forbidigo - backup.CopyConfig(config) - - assert.Equal(t, "qux", backup.Get("baz")) - assert.Equal(t, "bar", backup.Get("foo")) - t.Setenv("XXYYZZ", "value") - assert.Equal(t, "value", backup.Get("xyz")) - assert.True(t, backup.IsKnown("tyu")) - // can't compare function pointers directly so just check the number of callbacks - assert.Len(t, backup.(*safeConfig).notificationReceivers, 1, "notification receivers should be copied") -} - func TestExtraConfig(t *testing.T) { config := NewConfig("test", "DD", strings.NewReplacer(".", "_")) // nolint: forbidigo diff --git a/pkg/config/nodetreemodel/config.go b/pkg/config/nodetreemodel/config.go index f59b6f7dbe324..14f727b6086a1 100644 --- a/pkg/config/nodetreemodel/config.go +++ b/pkg/config/nodetreemodel/config.go @@ -305,6 +305,18 @@ func (c *ntmConfig) BuildSchema() { } } +// Stringify stringifies the config, but only with the test build tag +func (c *ntmConfig) Stringify(source model.Source) string { + c.Lock() + defer c.Unlock() + // only does anything if the build tag "test" is enabled + text, err := c.toDebugString(source) + if err != nil { + return fmt.Sprintf("Stringify error: %s", err) + } + return text +} + func (c *ntmConfig) isReady() bool { return c.ready.Load() } @@ -482,14 +494,25 @@ func (c *ntmConfig) UnmarshalKey(key string, _rawVal interface{}, _opts ...viper c.RLock() defer c.RUnlock() c.checkKnownKey(key) - return c.logErrorNotImplemented("UnmarshalKey") + return fmt.Errorf("nodetreemodel.UnmarshalKey not available, use pkg/config/structure.UnmarshalKey instead") } // MergeConfig merges in another config -func (c *ntmConfig) MergeConfig(_in io.Reader) error { +func (c *ntmConfig) MergeConfig(in io.Reader) error { c.Lock() defer c.Unlock() - return c.logErrorNotImplemented("MergeConfig") + + content, err := io.ReadAll(in) + if err != nil { + return err + } + + other := newInnerNode(nil) + if err = c.readConfigurationContent(other, content); err != nil { + return err + } + + return c.root.Merge(other) } // MergeFleetPolicy merges the configuration from the reader given with an existing config @@ -619,11 +642,9 @@ func (c *ntmConfig) ConfigFileUsed() string { return c.configFile } -// SetTypeByDefaultValue enables typing using default values +// SetTypeByDefaultValue is a no-op func (c *ntmConfig) SetTypeByDefaultValue(_in bool) { - c.Lock() - defer c.Unlock() - c.logErrorNotImplemented("SetTypeByDefaultValue") + // do nothing: nodetreemodel always does this conversion } // BindEnvAndSetDefault binds an environment variable and sets a default for the given key @@ -661,7 +682,6 @@ func NewConfig(name string, envPrefix string, envKeyReplacer *strings.Replacer) envTransform: make(map[string]func(string) interface{}), } - config.SetTypeByDefaultValue(true) config.SetConfigName(name) config.SetEnvPrefix(envPrefix) config.SetEnvKeyReplacer(envKeyReplacer) @@ -669,26 +689,6 @@ func NewConfig(name string, envPrefix string, envKeyReplacer *strings.Replacer) return &config } -// CopyConfig copies the given config to the receiver config. This should only be used in tests as replacing -// the global config reference is unsafe. -func (c *ntmConfig) CopyConfig(cfg model.Config) { - c.Lock() - defer c.Unlock() - c.logErrorNotImplemented("CopyConfig") - if cfg, ok := cfg.(*ntmConfig); ok { - // TODO: Probably a bug, should be a deep copy, add a test and verify - c.root = cfg.root - c.envPrefix = cfg.envPrefix - c.envKeyReplacer = cfg.envKeyReplacer - c.proxies = cfg.proxies - c.configEnvVars = cfg.configEnvVars - c.unknownKeys = cfg.unknownKeys - c.notificationReceivers = cfg.notificationReceivers - return - } - panic("Replacement config must be an instance of ntmConfig") -} - // ExtraConfigFilesUsed returns the additional config files used func (c *ntmConfig) ExtraConfigFilesUsed() []string { c.Lock() diff --git a/pkg/config/nodetreemodel/config_test.go b/pkg/config/nodetreemodel/config_test.go index 5080841815605..c9f87747e2345 100644 --- a/pkg/config/nodetreemodel/config_test.go +++ b/pkg/config/nodetreemodel/config_test.go @@ -347,3 +347,81 @@ func TestAllKeysLowercased(t *testing.T) { sort.Strings(keys) assert.Equal(t, []string{"a", "b"}, keys) } + +func TestStringify(t *testing.T) { + configData := `network_path: + collector: + workers: 6 +secret_backend_command: ./my_secret_fetcher.sh +` + os.Setenv("TEST_SECRET_BACKEND_TIMEOUT", "60") + os.Setenv("TEST_NETWORK_PATH_COLLECTOR_INPUT_CHAN_SIZE", "23456") + + cfg := NewConfig("test", "TEST", strings.NewReplacer(".", "_")) + cfg.BindEnvAndSetDefault("network_path.collector.input_chan_size", 100000) + cfg.BindEnvAndSetDefault("network_path.collector.processing_chan_size", 100000) + cfg.BindEnvAndSetDefault("network_path.collector.workers", 4) + cfg.BindEnvAndSetDefault("secret_backend_command", "") + cfg.BindEnvAndSetDefault("secret_backend_timeout", 0) + cfg.BindEnvAndSetDefault("server_timeout", 30) + + cfg.BuildSchema() + err := cfg.ReadConfig(strings.NewReader(configData)) + require.NoError(t, err) + + txt := cfg.(*ntmConfig).Stringify("none") + expect := "Stringify error: invalid source: none" + assert.Equal(t, expect, txt) + + txt = cfg.(*ntmConfig).Stringify(model.SourceDefault) + expect = `network_path + collector + input_chan_size + val:100000, source:default + processing_chan_size + val:100000, source:default + workers + val:4, source:default +secret_backend_command + val:, source:default +secret_backend_timeout + val:0, source:default +server_timeout + val:30, source:default` + assert.Equal(t, expect, txt) + + txt = cfg.(*ntmConfig).Stringify(model.SourceFile) + expect = `network_path + collector + workers + val:6, source:file +secret_backend_command + val:./my_secret_fetcher.sh, source:file` + assert.Equal(t, expect, txt) + + txt = cfg.(*ntmConfig).Stringify(model.SourceEnvVar) + expect = `network_path + collector + input_chan_size + val:23456, source:environment-variable +secret_backend_timeout + val:60, source:environment-variable` + assert.Equal(t, expect, txt) + + txt = cfg.(*ntmConfig).Stringify("root") + expect = `network_path + collector + input_chan_size + val:23456, source:environment-variable + processing_chan_size + val:100000, source:default + workers + val:6, source:file +secret_backend_command + val:./my_secret_fetcher.sh, source:file +secret_backend_timeout + val:60, source:environment-variable +server_timeout + val:30, source:default` + assert.Equal(t, expect, txt) +} diff --git a/pkg/config/nodetreemodel/debug_string.go b/pkg/config/nodetreemodel/debug_string.go new file mode 100644 index 0000000000000..4d20a02800808 --- /dev/null +++ b/pkg/config/nodetreemodel/debug_string.go @@ -0,0 +1,16 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build !test +// +build !test + +package nodetreemodel + +import "github.com/DataDog/datadog-agent/pkg/config/model" + +func (c *ntmConfig) toDebugString(_ model.Source) (string, error) { + // don't show any data outside of tests, that way we don't have to worry about scrubbing + return "nodeTreeModelConfig{...}", nil +} diff --git a/pkg/config/nodetreemodel/debug_string_testonly.go b/pkg/config/nodetreemodel/debug_string_testonly.go new file mode 100644 index 0000000000000..a3cf3ae41cd94 --- /dev/null +++ b/pkg/config/nodetreemodel/debug_string_testonly.go @@ -0,0 +1,65 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build test +// +build test + +package nodetreemodel + +import ( + "fmt" + "strings" + + "github.com/DataDog/datadog-agent/pkg/config/model" +) + +func (c *ntmConfig) toDebugString(source model.Source) (string, error) { + var node Node + switch source { + case "root": + node = c.root + case model.SourceEnvVar: + node = c.envs + case model.SourceDefault: + node = c.defaults + case model.SourceFile: + node = c.file + default: + return "", fmt.Errorf("invalid source: %s", source) + } + lines, err := debugTree(node, 0) + if err != nil { + return "", err + } + return strings.Join(lines, "\n"), nil +} + +func debugTree(n Node, depth int) ([]string, error) { + padding := strings.Repeat(" ", depth) + if n == nil { + return []string{fmt.Sprintf("%s%v", padding, "")}, nil + } + if leaf, ok := n.(LeafNode); ok { + val := leaf.Get() + source := leaf.Source() + return []string{fmt.Sprintf("%sval:%v, source:%s", padding, val, source)}, nil + } + inner, ok := n.(InnerNode) + if !ok { + return nil, fmt.Errorf("unknown node type: %T", n) + } + keys := inner.ChildrenKeys() + result := []string{} + for _, key := range keys { + msg := fmt.Sprintf("%s%s", padding, key) + child, _ := n.GetChild(key) + rest, err := debugTree(child, depth+1) + if err != nil { + return nil, err + } + result = append(result, append([]string{msg}, rest...)...) + } + return result, nil +} diff --git a/pkg/config/nodetreemodel/read_config_file.go b/pkg/config/nodetreemodel/read_config_file.go index 5a7204616f346..c829ff6837aeb 100644 --- a/pkg/config/nodetreemodel/read_config_file.go +++ b/pkg/config/nodetreemodel/read_config_file.go @@ -60,7 +60,7 @@ func (c *ntmConfig) ReadConfig(in io.Reader) error { if err != nil { return err } - if err := c.readConfigurationContent(content); err != nil { + if err := c.readConfigurationContent(c.file, content); err != nil { return err } return c.mergeAllLayers() @@ -71,15 +71,15 @@ func (c *ntmConfig) readInConfig(filePath string) error { if err != nil { return err } - return c.readConfigurationContent(content) + return c.readConfigurationContent(c.file, content) } -func (c *ntmConfig) readConfigurationContent(content []byte) error { +func (c *ntmConfig) readConfigurationContent(target InnerNode, content []byte) error { var obj map[string]interface{} if err := yaml.Unmarshal(content, &obj); err != nil { return err } - c.warnings = append(c.warnings, loadYamlInto(c.defaults, c.file, obj, "")...) + c.warnings = append(c.warnings, loadYamlInto(c.defaults, target, obj, "")...) return nil } diff --git a/pkg/config/setup/config.go b/pkg/config/setup/config.go index 5690289442f25..53327bb3f2908 100644 --- a/pkg/config/setup/config.go +++ b/pkg/config/setup/config.go @@ -879,6 +879,7 @@ func InitConfig(config pkgconfigmodel.Setup) { config.BindEnvAndSetDefault("otelcollector.extension_timeout", 0) // in seconds, 0 for default value config.BindEnvAndSetDefault("otelcollector.submit_dummy_metadata", false) // dev flag - to be removed config.BindEnvAndSetDefault("otelcollector.converter.enabled", true) + config.BindEnvAndSetDefault("otelcollector.flare.timeout", 60) // inventories config.BindEnvAndSetDefault("inventories_enabled", true) @@ -1121,6 +1122,10 @@ func agent(config pkgconfigmodel.Setup) { config.SetKnown("proxy.http") config.SetKnown("proxy.https") config.SetKnown("proxy.no_proxy") + + // Core agent (disabled for Error Tracking Standalone, Logs Collection Only) + config.BindEnvAndSetDefault("core_agent.enabled", true) + pkgconfigmodel.AddOverrideFunc(toggleDefaultPayloads) } func fleet(config pkgconfigmodel.Setup) { @@ -2384,6 +2389,17 @@ func sanitizeExternalMetricsProviderChunkSize(config pkgconfigmodel.Config) { } } +func toggleDefaultPayloads(config pkgconfigmodel.Config) { + // Disables metric data submission (including Custom Metrics) so that hosts stop showing up in Datadog. + // Used namely for Error Tracking Standalone where it is not needed. + if !config.GetBool("core_agent.enabled") { + config.Set("enable_payloads.events", false, pkgconfigmodel.SourceAgentRuntime) + config.Set("enable_payloads.series", false, pkgconfigmodel.SourceAgentRuntime) + config.Set("enable_payloads.service_checks", false, pkgconfigmodel.SourceAgentRuntime) + config.Set("enable_payloads.sketches", false, pkgconfigmodel.SourceAgentRuntime) + } +} + func bindEnvAndSetLogsConfigKeys(config pkgconfigmodel.Setup, prefix string) { config.BindEnv(prefix + "logs_dd_url") // Send the logs to a proxy. Must respect format ':' and '' to be an integer config.BindEnv(prefix + "dd_url") @@ -2595,7 +2611,7 @@ func GetRemoteConfigurationAllowedIntegrations(cfg pkgconfigmodel.Reader) map[st return allowMap } -// IsAgentTelemetryEnabled returns true if Agent Telemetry ise enabled +// IsAgentTelemetryEnabled returns true if Agent Telemetry is enabled func IsAgentTelemetryEnabled(cfg pkgconfigmodel.Reader) bool { // Disable Agent Telemetry for GovCloud if cfg.GetBool("fips.enabled") || cfg.GetString("site") == "ddog-gov.com" { diff --git a/pkg/config/setup/config_test.go b/pkg/config/setup/config_test.go index a6250a7156220..1421770758180 100644 --- a/pkg/config/setup/config_test.go +++ b/pkg/config/setup/config_test.go @@ -1423,6 +1423,29 @@ func TestServerlessConfigInit(t *testing.T) { assert.False(t, conf.IsKnown("inventories_enabled")) } +func TestDisableCoreAgent(t *testing.T) { + pkgconfigmodel.CleanOverride(t) + conf := pkgconfigmodel.NewConfig("datadog", "DD", strings.NewReplacer(".", "_")) // nolint: forbidigo // legit use case + pkgconfigmodel.AddOverrideFunc(toggleDefaultPayloads) + + InitConfig(conf) + assert.True(t, conf.GetBool("core_agent.enabled")) + pkgconfigmodel.ApplyOverrideFuncs(conf) + // ensure events default payloads are enabled + assert.True(t, conf.GetBool("enable_payloads.events")) + assert.True(t, conf.GetBool("enable_payloads.series")) + assert.True(t, conf.GetBool("enable_payloads.service_checks")) + assert.True(t, conf.GetBool("enable_payloads.sketches")) + + conf.BindEnvAndSetDefault("core_agent.enabled", false) + pkgconfigmodel.ApplyOverrideFuncs(conf) + // ensure events default payloads are disabled + assert.False(t, conf.GetBool("enable_payloads.events")) + assert.False(t, conf.GetBool("enable_payloads.series")) + assert.False(t, conf.GetBool("enable_payloads.service_checks")) + assert.False(t, conf.GetBool("enable_payloads.sketches")) +} + func TestAgentConfigInit(t *testing.T) { conf := newTestConf() diff --git a/pkg/config/setup/system_probe.go b/pkg/config/setup/system_probe.go index 9cb7580607062..a31d90efac8d8 100644 --- a/pkg/config/setup/system_probe.go +++ b/pkg/config/setup/system_probe.go @@ -249,7 +249,7 @@ func InitSystemProbeConfig(cfg pkgconfigmodel.Config) { cfg.BindEnvAndSetDefault(join(smNS, "enable_kafka_monitoring"), false) cfg.BindEnv(join(smNS, "enable_postgres_monitoring")) cfg.BindEnv(join(smNS, "enable_redis_monitoring")) - cfg.BindEnvAndSetDefault(join(smNS, "tls", "istio", "enabled"), false) + cfg.BindEnvAndSetDefault(join(smNS, "tls", "istio", "enabled"), true) cfg.BindEnvAndSetDefault(join(smNS, "tls", "istio", "envoy_path"), defaultEnvoyPath) cfg.BindEnv(join(smNS, "tls", "nodejs", "enabled")) cfg.BindEnvAndSetDefault(join(smjtNS, "enabled"), false) diff --git a/pkg/config/teeconfig/teeconfig.go b/pkg/config/teeconfig/teeconfig.go index 4101f43f4d5dd..f56190eca175b 100644 --- a/pkg/config/teeconfig/teeconfig.go +++ b/pkg/config/teeconfig/teeconfig.go @@ -364,11 +364,9 @@ func (t *teeConfig) Object() model.Reader { return t.baseline } -// CopyConfig copies the given config to the receiver config. This should only be used in tests as replacing -// the global config reference is unsafe. -func (t *teeConfig) CopyConfig(cfg model.Config) { - t.baseline.CopyConfig(cfg) - t.compare.CopyConfig(cfg) +// Stringify stringifies the config +func (t *teeConfig) Stringify(source model.Source) string { + return t.baseline.Stringify(source) } func (t *teeConfig) GetProxies() *model.Proxy { diff --git a/pkg/config/utils/miscellaneous.go b/pkg/config/utils/miscellaneous.go index 18f87b9ade392..faf3572d318d3 100644 --- a/pkg/config/utils/miscellaneous.go +++ b/pkg/config/utils/miscellaneous.go @@ -31,3 +31,29 @@ func SetLogLevel(level string, config pkgconfigmodel.Writer, source pkgconfigmod config.Set("log_level", seelogLogLevel, source) return nil } + +// IsCoreAgentEnabled checks if the Agent is able to send the payloads it and other Agents need to function with +func IsCoreAgentEnabled(cfg pkgconfigmodel.Reader) bool { + if !cfg.GetBool("core_agent.enabled") { + return false + } + + // core_agent.enabled can be true but the following payloads if set to false means + // core_agent is disabled + if !cfg.GetBool("enable_payloads.events") && + !cfg.GetBool("enable_payloads.series") && + !cfg.GetBool("enable_payloads.service_checks") && + !cfg.GetBool("enable_payloads.sketches") { + return false + } + + return true +} + +// IsAPMEnabled checks if APM is enabled or if Error Tracking standalone is enabled, simplifying the setup for +// Error Tracking standalone only via the apm_config.error_tracking_standalone.enabled option instead of requiring +// to enable also apm_config.enabled. +func IsAPMEnabled(cfg pkgconfigmodel.Reader) bool { + return (cfg.GetBool("apm_config.enabled") && IsCoreAgentEnabled(cfg)) || + cfg.GetBool("apm_config.error_tracking_standalone.enabled") +} diff --git a/pkg/config/utils/miscellaneous_test.go b/pkg/config/utils/miscellaneous_test.go new file mode 100644 index 0000000000000..96c65f5b33b9c --- /dev/null +++ b/pkg/config/utils/miscellaneous_test.go @@ -0,0 +1,66 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package utils + +import ( + "github.com/stretchr/testify/assert" + "testing" + + configmock "github.com/DataDog/datadog-agent/pkg/config/mock" + "github.com/DataDog/datadog-agent/pkg/config/model" +) + +func TestIsCoreAgentEnabled(t *testing.T) { + + tests := []struct { + name string + expected bool + setConfig func(m model.Config) + }{ + { + name: "core_agent.enabled false", + expected: false, + setConfig: func(m model.Config) { + m.SetWithoutSource("core_agent.enabled", false) + }, + }, + { + name: "All enable_payloads.enabled false", + expected: false, + setConfig: func(m model.Config) { + m.SetWithoutSource("enable_payloads.events", false) + m.SetWithoutSource("enable_payloads.series", false) + m.SetWithoutSource("enable_payloads.service_checks", false) + m.SetWithoutSource("enable_payloads.sketches", false) + }, + }, + { + name: "Some enable_payloads.enabled false", + expected: true, + setConfig: func(m model.Config) { + m.SetWithoutSource("enable_payloads.events", false) + m.SetWithoutSource("enable_payloads.series", true) + m.SetWithoutSource("enable_payloads.service_checks", false) + m.SetWithoutSource("enable_payloads.sketches", true) + }, + }, + { + name: "default values", + expected: true, + setConfig: func(_ model.Config) {}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + mockConfig := configmock.New(t) + test.setConfig(mockConfig) + assert.Equal(t, + test.expected, IsCoreAgentEnabled(mockConfig), + "Was expecting IsCoreAgentEnabled to return", test.expected) + }) + } +} diff --git a/pkg/config/utils/tags.go b/pkg/config/utils/tags.go index 5a5d3afa16199..36d6aef5c7aed 100644 --- a/pkg/config/utils/tags.go +++ b/pkg/config/utils/tags.go @@ -28,3 +28,17 @@ func GetConfiguredTags(c pkgconfigmodel.Reader, includeDogstatsd bool) []string return combined } + +// GetConfiguredDCATags returns list of tags from a configuration, based on +// `cluster_checks.extra_tags` (DD_CLUSTER_CHECKS_EXTRA_TAGS) and +// `orchestrator_explorer.extra_tags (DD_ORCHESTRATOR_EXPLORER_EXTRA_TAGS). +func GetConfiguredDCATags(c pkgconfigmodel.Reader) []string { + clusterCheckTags := c.GetStringSlice("cluster_checks.extra_tags") + orchestratorTags := c.GetStringSlice("orchestrator_explorer.extra_tags") + + combined := make([]string, 0, len(clusterCheckTags)+len(orchestratorTags)) + combined = append(combined, clusterCheckTags...) + combined = append(combined, orchestratorTags...) + + return combined +} diff --git a/pkg/ebpf/mappings.go b/pkg/ebpf/mappings.go index 4f22298cc02cb..b7feb3f966195 100644 --- a/pkg/ebpf/mappings.go +++ b/pkg/ebpf/mappings.go @@ -38,6 +38,17 @@ func AddProgramNameMapping(progid uint32, name string, module string) { progModuleMapping[progid] = module } +// RemoveProgramID manually removes a program name mapping +func RemoveProgramID(progID uint32, expectedModule string) { + mappingLock.Lock() + defer mappingLock.Unlock() + + if progModuleMapping[progID] == expectedModule { + delete(progNameMapping, progID) + delete(progModuleMapping, progID) + } +} + // AddNameMappings adds the full name mappings for ebpf maps in the manager func AddNameMappings(mgr *manager.Manager, module string) { maps, err := mgr.GetMaps() diff --git a/pkg/ebpf/uprobes/attacher.go b/pkg/ebpf/uprobes/attacher.go index 903b69b59f620..f413462bea633 100644 --- a/pkg/ebpf/uprobes/attacher.go +++ b/pkg/ebpf/uprobes/attacher.go @@ -295,6 +295,9 @@ type FileRegistry interface { // GetRegisteredProcesses returns a map of all the processes that are currently registered in the registry GetRegisteredProcesses() map[uint32]struct{} + + // Log is a function that gets called periodically to log the state of the registry + Log() } // AttachCallback is a callback that is called whenever a probe is attached successfully @@ -489,6 +492,9 @@ func (ua *UprobeAttacher) Start() error { case <-processSync.C: // We always track process deletions in the scan, to avoid memory leaks. _ = ua.Sync(ua.config.EnablePeriodicScanNewProcesses, true) + + // Periodically log the state of the registry + ua.fileRegistry.Log() } } }() diff --git a/pkg/ebpf/uprobes/attacher_test.go b/pkg/ebpf/uprobes/attacher_test.go index 6003decb2ca42..55172da64b1bb 100644 --- a/pkg/ebpf/uprobes/attacher_test.go +++ b/pkg/ebpf/uprobes/attacher_test.go @@ -326,6 +326,7 @@ func TestMonitor(t *testing.T) { // Tell mockRegistry to return on any calls, we will check the values later mockRegistry.On("Clear").Return() + mockRegistry.On("Log").Return() mockRegistry.On("Unregister", mock.Anything).Return(nil) mockRegistry.On("Register", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) lib := getLibSSLPath(t) @@ -872,6 +873,7 @@ func (s *SharedLibrarySuite) TestSingleFile() { // Tell mockRegistry to return on any calls, we will check the values later mockRegistry.On("Clear").Return() + mockRegistry.On("Log").Return() mockRegistry.On("Unregister", mock.Anything).Return(nil) mockRegistry.On("Register", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) @@ -950,6 +952,7 @@ func (s *SharedLibrarySuite) TestDetectionWithPIDAndRootNamespace() { // Tell mockRegistry to return on any calls, we will check the values later mockRegistry.On("Clear").Return() + mockRegistry.On("Log").Return() mockRegistry.On("Unregister", mock.Anything).Return(nil) mockRegistry.On("Register", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) diff --git a/pkg/ebpf/uprobes/testutil.go b/pkg/ebpf/uprobes/testutil.go index d5d27d4e5b2aa..dcda53b56d155 100644 --- a/pkg/ebpf/uprobes/testutil.go +++ b/pkg/ebpf/uprobes/testutil.go @@ -78,6 +78,11 @@ func (m *MockFileRegistry) GetRegisteredProcesses() map[uint32]struct{} { return args.Get(0).(map[uint32]struct{}) } +// Log is a mock implementation of the FileRegistry.Log method. +func (m *MockFileRegistry) Log() { + m.Called() +} + // MockBinaryInspector is a mock implementation of the BinaryInspector interface. type MockBinaryInspector struct { mock.Mock diff --git a/pkg/eventmonitor/eventmonitor.go b/pkg/eventmonitor/eventmonitor.go index e37ddebd55784..418a314a3ca12 100644 --- a/pkg/eventmonitor/eventmonitor.go +++ b/pkg/eventmonitor/eventmonitor.go @@ -19,7 +19,6 @@ import ( "google.golang.org/grpc" "github.com/DataDog/datadog-agent/cmd/system-probe/api/module" - "github.com/DataDog/datadog-agent/comp/core/telemetry" "github.com/DataDog/datadog-agent/pkg/eventmonitor/config" procstatsd "github.com/DataDog/datadog-agent/pkg/process/statsd" secconfig "github.com/DataDog/datadog-agent/pkg/security/config" @@ -224,7 +223,7 @@ func (m *EventMonitor) GetStats() map[string]interface{} { } // NewEventMonitor instantiates an event monitoring system-probe module -func NewEventMonitor(config *config.Config, secconfig *secconfig.Config, opts Opts, telemetry telemetry.Component) (*EventMonitor, error) { +func NewEventMonitor(config *config.Config, secconfig *secconfig.Config, opts Opts) (*EventMonitor, error) { if opts.StatsdClient == nil { opts.StatsdClient = procstatsd.Client } @@ -233,7 +232,7 @@ func NewEventMonitor(config *config.Config, secconfig *secconfig.Config, opts Op opts.ProbeOpts.StatsdClient = opts.StatsdClient } - probe, err := probe.NewProbe(secconfig, opts.ProbeOpts, telemetry) + probe, err := probe.NewProbe(secconfig, opts.ProbeOpts) if err != nil { return nil, err } diff --git a/pkg/eventmonitor/testutil/testutil.go b/pkg/eventmonitor/testutil/testutil.go index 83e1083ad2659..77f9e2772fad0 100644 --- a/pkg/eventmonitor/testutil/testutil.go +++ b/pkg/eventmonitor/testutil/testutil.go @@ -15,12 +15,9 @@ import ( "github.com/stretchr/testify/require" sysconfig "github.com/DataDog/datadog-agent/cmd/system-probe/config" - "github.com/DataDog/datadog-agent/comp/core/telemetry" - "github.com/DataDog/datadog-agent/comp/core/telemetry/telemetryimpl" "github.com/DataDog/datadog-agent/pkg/eventmonitor" emconfig "github.com/DataDog/datadog-agent/pkg/eventmonitor/config" secconfig "github.com/DataDog/datadog-agent/pkg/security/config" - "github.com/DataDog/datadog-agent/pkg/util/fxutil" ) // PreStartCallback is a callback to register clients to the event monitor before starting it @@ -39,8 +36,7 @@ func StartEventMonitor(t *testing.T, callback PreStartCallback) { require.NoError(t, os.MkdirAll("/opt/datadog-agent/run/", 0755)) opts := eventmonitor.Opts{} - telemetry := fxutil.Test[telemetry.Component](t, telemetryimpl.MockModule()) - evm, err := eventmonitor.NewEventMonitor(emconfig, secconfig, opts, telemetry) + evm, err := eventmonitor.NewEventMonitor(emconfig, secconfig, opts) require.NoError(t, err) require.NoError(t, evm.Init()) callback(t, evm) diff --git a/pkg/fleet/installer/repository/repositories.go b/pkg/fleet/installer/repository/repositories.go index b03364dab8198..b63714eb7acdc 100644 --- a/pkg/fleet/installer/repository/repositories.go +++ b/pkg/fleet/installer/repository/repositories.go @@ -58,8 +58,8 @@ func (r *Repositories) loadRepositories() (map[string]*Repository, error) { // Temporary dir created by Repositories.MkdirTemp, ignore continue } - if d.Name() == "run" { - // run dir, ignore + if d.Name() == "run" || d.Name() == "tmp" { + // run/tmp dir, ignore continue } repo := r.newRepository(d.Name()) diff --git a/pkg/fleet/installer/repository/repositories_test.go b/pkg/fleet/installer/repository/repositories_test.go index 00b38d2509542..ab66494b826ef 100644 --- a/pkg/fleet/installer/repository/repositories_test.go +++ b/pkg/fleet/installer/repository/repositories_test.go @@ -68,10 +68,14 @@ func TestLoadRepositories(t *testing.T) { os.Mkdir(path.Join(rootDir, "datadog-agent"), 0755) os.Mkdir(path.Join(rootDir, tempDirPrefix+"2394812349"), 0755) + os.Mkdir(path.Join(rootDir, "run"), 0755) + os.Mkdir(path.Join(rootDir, "tmp"), 0755) repositories, err := NewRepositories(rootDir, runDir).loadRepositories() assert.NoError(t, err) assert.Len(t, repositories, 1) assert.Contains(t, repositories, "datadog-agent") assert.NotContains(t, repositories, tempDirPrefix+"2394812349") + assert.NotContains(t, repositories, "run") + assert.NotContains(t, repositories, "tmp") } diff --git a/pkg/fleet/installer/service/datadog_installer.go b/pkg/fleet/installer/service/datadog_installer.go index ca41e32956916..722766a6a0659 100644 --- a/pkg/fleet/installer/service/datadog_installer.go +++ b/pkg/fleet/installer/service/datadog_installer.go @@ -108,13 +108,13 @@ func SetupInstaller(ctx context.Context) (err error) { } // Enforce that the directory exists. It should be created by the bootstrapper but // older versions don't do it - err = os.MkdirAll("/opt/datadog-installer/tmp", 0755) + err = os.MkdirAll("/opt/datadog-packages/tmp", 0755) if err != nil { - return fmt.Errorf("error creating /opt/datadog-installer/tmp: %w", err) + return fmt.Errorf("error creating /opt/datadog-packages/tmp: %w", err) } - err = os.Chown("/opt/datadog-installer/tmp", ddAgentUID, ddAgentGID) + err = os.Chown("/opt/datadog-packages/tmp", ddAgentUID, ddAgentGID) if err != nil { - return fmt.Errorf("error changing owner of /opt/datadog-installer/tmp: %w", err) + return fmt.Errorf("error changing owner of /opt/datadog-packages/tmp: %w", err) } // Create installer path symlink err = os.Symlink("/opt/datadog-packages/datadog-installer/stable/bin/installer/installer", "/usr/bin/datadog-installer") diff --git a/pkg/fleet/installer/service/embedded/datadog-agent-exp.service b/pkg/fleet/installer/service/embedded/datadog-agent-exp.service index 40adc828787db..dd6eb94c5f4ad 100644 --- a/pkg/fleet/installer/service/embedded/datadog-agent-exp.service +++ b/pkg/fleet/installer/service/embedded/datadog-agent-exp.service @@ -12,7 +12,7 @@ Type=oneshot PIDFile=/opt/datadog-packages/datadog-agent/experiment/run/agent.pid User=dd-agent EnvironmentFile=-/etc/datadog-agent/environment -Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-packages/datadog-agent/experiment" +Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-agent/managed/datadog-agent/experiment" ExecStart=/opt/datadog-packages/datadog-agent/experiment/bin/agent/agent run -p /opt/datadog-packages/datadog-agent/experiment/run/agent.pid ExecStart=/bin/false ExecStop=/bin/false diff --git a/pkg/fleet/installer/service/embedded/datadog-agent-process-exp.service b/pkg/fleet/installer/service/embedded/datadog-agent-process-exp.service index 9583865d902df..09f5bd3b826c4 100644 --- a/pkg/fleet/installer/service/embedded/datadog-agent-process-exp.service +++ b/pkg/fleet/installer/service/embedded/datadog-agent-process-exp.service @@ -9,7 +9,7 @@ PIDFile=/opt/datadog-packages/datadog-agent/experiment/run/process-agent.pid User=dd-agent Restart=on-failure EnvironmentFile=-/etc/datadog-agent/environment -Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-packages/datadog-agent/experiment" +Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-agent/managed/datadog-agent/experiment" ExecStart=/opt/datadog-packages/datadog-agent/experiment/embedded/bin/process-agent --cfgpath=/etc/datadog-agent/datadog.yaml --sysprobe-config=/etc/datadog-agent/system-probe.yaml --pid=/opt/datadog-packages/datadog-agent/experiment/run/process-agent.pid # Since systemd 229, should be in [Unit] but in order to support systemd <229, # it is also supported to have it here. diff --git a/pkg/fleet/installer/service/embedded/datadog-agent-process.service b/pkg/fleet/installer/service/embedded/datadog-agent-process.service index 23e892f1ef5da..dd0d336ba2c75 100644 --- a/pkg/fleet/installer/service/embedded/datadog-agent-process.service +++ b/pkg/fleet/installer/service/embedded/datadog-agent-process.service @@ -9,7 +9,7 @@ PIDFile=/opt/datadog-packages/datadog-agent/stable/run/process-agent.pid User=dd-agent Restart=on-failure EnvironmentFile=-/etc/datadog-agent/environment -Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-packages/datadog-agent/stable" +Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-agent/managed/datadog-agent/stable" ExecStart=/opt/datadog-packages/datadog-agent/stable/embedded/bin/process-agent --cfgpath=/etc/datadog-agent/datadog.yaml --sysprobe-config=/etc/datadog-agent/system-probe.yaml --pid=/opt/datadog-packages/datadog-agent/stable/run/process-agent.pid # Since systemd 229, should be in [Unit] but in order to support systemd <229, # it is also supported to have it here. diff --git a/pkg/fleet/installer/service/embedded/datadog-agent-security-exp.service b/pkg/fleet/installer/service/embedded/datadog-agent-security-exp.service index 8366d8c61c5de..b593ece47463d 100644 --- a/pkg/fleet/installer/service/embedded/datadog-agent-security-exp.service +++ b/pkg/fleet/installer/service/embedded/datadog-agent-security-exp.service @@ -3,14 +3,14 @@ Description=Datadog Security Agent Experiment After=network.target BindsTo=datadog-agent-exp.service ConditionPathExists=|/etc/datadog-agent/security-agent.yaml -ConditionPathExists=|/etc/datadog-packages/datadog-agent/experiment/security-agent.yaml +ConditionPathExists=|/etc/datadog-agent/managed/datadog-agent/experiment/security-agent.yaml [Service] Type=simple PIDFile=/opt/datadog-packages/datadog-agent/experiment/run/security-agent.pid Restart=on-failure EnvironmentFile=-/etc/datadog-agent/environment -Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-packages/datadog-agent/experiment" +Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-agent/managed/datadog-agent/experiment" ExecStart=/opt/datadog-packages/datadog-agent/experiment/embedded/bin/security-agent start -c /etc/datadog-agent/datadog.yaml --pidfile /opt/datadog-packages/datadog-agent/experiment/run/security-agent.pid # Since systemd 229, should be in [Unit] but in order to support systemd <229, # it is also supported to have it here. diff --git a/pkg/fleet/installer/service/embedded/datadog-agent-security.service b/pkg/fleet/installer/service/embedded/datadog-agent-security.service index f3cca2a2889bd..12c51f65446bc 100644 --- a/pkg/fleet/installer/service/embedded/datadog-agent-security.service +++ b/pkg/fleet/installer/service/embedded/datadog-agent-security.service @@ -3,14 +3,14 @@ Description=Datadog Security Agent After=network.target datadog-agent.service BindsTo=datadog-agent.service ConditionPathExists=|/etc/datadog-agent/security-agent.yaml -ConditionPathExists=|/etc/datadog-packages/datadog-agent/stable/security-agent.yaml +ConditionPathExists=|/etc/datadog-agent/managed/datadog-agent/stable/security-agent.yaml [Service] Type=simple PIDFile=/opt/datadog-packages/datadog-agent/stable/run/security-agent.pid Restart=on-failure EnvironmentFile=-/etc/datadog-agent/environment -Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-packages/datadog-agent/stable" +Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-agent/managed/datadog-agent/stable" ExecStart=/opt/datadog-packages/datadog-agent/stable/embedded/bin/security-agent start -c /etc/datadog-agent/datadog.yaml --pidfile /opt/datadog-packages/datadog-agent/stable/run/security-agent.pid # Since systemd 229, should be in [Unit] but in order to support systemd <229, # it is also supported to have it here. diff --git a/pkg/fleet/installer/service/embedded/datadog-agent-sysprobe-exp.service b/pkg/fleet/installer/service/embedded/datadog-agent-sysprobe-exp.service index 201f36f7615b4..a0844c974021f 100644 --- a/pkg/fleet/installer/service/embedded/datadog-agent-sysprobe-exp.service +++ b/pkg/fleet/installer/service/embedded/datadog-agent-sysprobe-exp.service @@ -4,13 +4,13 @@ Requires=sys-kernel-debug.mount After=network.target sys-kernel-debug.mount BindsTo=datadog-agent-exp.service ConditionPathExists=|/etc/datadog-agent/system-probe.yaml -ConditionPathExists=|/etc/datadog-packages/datadog-agent/experiment/system-probe.yaml +ConditionPathExists=|/etc/datadog-agent/managed/datadog-agent/experiment/system-probe.yaml [Service] Type=simple PIDFile=/opt/datadog-packages/datadog-agent/experiment/run/system-probe.pid Restart=on-failure -Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-packages/datadog-agent/experiment" +Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-agent/managed/datadog-agent/experiment" ExecStart=/opt/datadog-packages/datadog-agent/experiment/embedded/bin/system-probe run --config=/etc/datadog-agent/system-probe.yaml --pid=/opt/datadog-packages/datadog-agent/experiment/run/system-probe.pid # Since systemd 229, should be in [Unit] but in order to support systemd <229, # it is also supported to have it here. diff --git a/pkg/fleet/installer/service/embedded/datadog-agent-sysprobe.service b/pkg/fleet/installer/service/embedded/datadog-agent-sysprobe.service index 9afe7d60a49d7..7465b1fce1889 100644 --- a/pkg/fleet/installer/service/embedded/datadog-agent-sysprobe.service +++ b/pkg/fleet/installer/service/embedded/datadog-agent-sysprobe.service @@ -5,13 +5,13 @@ Before=datadog-agent.service After=network.target sys-kernel-debug.mount BindsTo=datadog-agent.service ConditionPathExists=|/etc/datadog-agent/system-probe.yaml -ConditionPathExists=|/etc/datadog-packages/datadog-agent/stable/system-probe.yaml +ConditionPathExists=|/etc/datadog-agent/managed/datadog-agent/stable/system-probe.yaml [Service] Type=simple PIDFile=/opt/datadog-packages/datadog-agent/stable/run/system-probe.pid Restart=on-failure -Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-packages/datadog-agent/stable" +Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-agent/managed/datadog-agent/stable" ExecStart=/opt/datadog-packages/datadog-agent/stable/embedded/bin/system-probe run --config=/etc/datadog-agent/system-probe.yaml --pid=/opt/datadog-packages/datadog-agent/stable/run/system-probe.pid # Since systemd 229, should be in [Unit] but in order to support systemd <229, # it is also supported to have it here. diff --git a/pkg/fleet/installer/service/embedded/datadog-agent-trace-exp.service b/pkg/fleet/installer/service/embedded/datadog-agent-trace-exp.service index 17bc119e85290..b05cf8dd9e86e 100644 --- a/pkg/fleet/installer/service/embedded/datadog-agent-trace-exp.service +++ b/pkg/fleet/installer/service/embedded/datadog-agent-trace-exp.service @@ -8,7 +8,7 @@ PIDFile=/opt/datadog-packages/datadog-agent/experiment/run/trace-agent.pid User=dd-agent Restart=on-failure EnvironmentFile=-/etc/datadog-agent/environment -Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-packages/datadog-agent/experiment" +Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-agent/managed/datadog-agent/experiment" ExecStart=/opt/datadog-packages/datadog-agent/experiment/embedded/bin/trace-agent --config /etc/datadog-agent/datadog.yaml --pidfile /opt/datadog-packages/datadog-agent/experiment/run/trace-agent.pid # Since systemd 229, should be in [Unit] but in order to support systemd <229, # it is also supported to have it here. diff --git a/pkg/fleet/installer/service/embedded/datadog-agent-trace.service b/pkg/fleet/installer/service/embedded/datadog-agent-trace.service index c3568e7004738..94ec7fc0a70d5 100644 --- a/pkg/fleet/installer/service/embedded/datadog-agent-trace.service +++ b/pkg/fleet/installer/service/embedded/datadog-agent-trace.service @@ -9,7 +9,7 @@ PIDFile=/opt/datadog-packages/datadog-agent/stable/run/trace-agent.pid User=dd-agent Restart=on-failure EnvironmentFile=-/etc/datadog-agent/environment -Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-packages/datadog-agent/stable" +Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-agent/managed/datadog-agent/stable" ExecStart=/opt/datadog-packages/datadog-agent/stable/embedded/bin/trace-agent --config /etc/datadog-agent/datadog.yaml --pidfile /opt/datadog-packages/datadog-agent/stable/run/trace-agent.pid # Since systemd 229, should be in [Unit] but in order to support systemd <229, # it is also supported to have it here. diff --git a/pkg/fleet/installer/service/embedded/datadog-agent.service b/pkg/fleet/installer/service/embedded/datadog-agent.service index 0f527777fc2e1..32ec1e68557c0 100644 --- a/pkg/fleet/installer/service/embedded/datadog-agent.service +++ b/pkg/fleet/installer/service/embedded/datadog-agent.service @@ -11,7 +11,7 @@ PIDFile=/opt/datadog-packages/datadog-agent/stable/run/agent.pid User=dd-agent Restart=on-failure EnvironmentFile=-/etc/datadog-agent/environment -Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-packages/datadog-agent/stable" +Environment="DD_FLEET_POLICIES_DIR=/etc/datadog-agent/managed/datadog-agent/stable" ExecStart=/opt/datadog-packages/datadog-agent/stable/bin/agent/agent run -p /opt/datadog-packages/datadog-agent/stable/run/agent.pid # Since systemd 229, should be in [Unit] but in order to support systemd <229, # it is also supported to have it here. diff --git a/pkg/fleet/installer/setup.sh b/pkg/fleet/installer/setup.sh new file mode 100644 index 0000000000000..a3dee1323ce00 --- /dev/null +++ b/pkg/fleet/installer/setup.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -e +# This script is used to install Datadog. + +if [ "$(uname -s)" != "Linux" ] || { [ "$(uname -m)" != "x86_64" ] && [ "$(uname -m)" != "aarch64" ]; }; then + echo "This installer only supports linux running on amd64 or arm64." >&2 + exit 1 +fi + +installer_path="/opt/datadog-installer-bootstrap" + +install() { + if [ "$UID" == "0" ]; then + sudo_cmd='' + else + sudo_cmd='sudo' + fi + + case "$(uname -m)" in + x86_64) + echo "${installer_bin_linux_amd64}" | base64 -d | $sudo_cmd tee "${installer_path}" >/dev/null + ;; + aarch64) + echo "${installer_bin_linux_arm64}" | base64 -d | $sudo_cmd tee "${installer_path}" >/dev/null + ;; + esac + $sudo_cmd chmod +x "${installer_path}" + echo "Running the installer binary..." + $sudo_cmd "${installer_path}" "$@" + $sudo_cmd rm -f "${installer_path}" +} + +# Embedded installer binaries. +# Source: https://github.com/DataDog/datadog-agent/tree/INSTALLER_COMMIT/cmd/installer +installer_bin_linux_amd64=$( + cat < int(b.Cap) { - c.invalidEventsCount.Add(1) + if length < 0 { + c.negativeLengthEventCount.Add(1) + return + } + if length > int(b.Cap) { + c.lengthExceededEventCount.Add(1) return } diff --git a/pkg/network/protocols/events/consumer_test.go b/pkg/network/protocols/events/consumer_test.go index 54c1212a669f5..2b1bba26b97ad 100644 --- a/pkg/network/protocols/events/consumer_test.go +++ b/pkg/network/protocols/events/consumer_test.go @@ -18,10 +18,12 @@ import ( manager "github.com/DataDog/ebpf-manager" "github.com/cilium/ebpf" + "github.com/cilium/ebpf/ringbuf" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/sys/unix" + ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" "github.com/DataDog/datadog-agent/pkg/ebpf/bytecode" "github.com/DataDog/datadog-agent/pkg/network/config" "github.com/DataDog/datadog-agent/pkg/util/kernel" @@ -169,3 +171,29 @@ func newEBPFProgram(c *config.Config) (*manager.Manager, error) { return m, nil } + +func TestInvalidBatchCountMetric(t *testing.T) { + kversion, err := kernel.HostVersion() + require.NoError(t, err) + if minVersion := kernel.VersionCode(4, 14, 0); kversion < minVersion { + t.Skipf("package not supported by kernels < %s", minVersion) + } + + program, err := newEBPFProgram(config.New()) + require.NoError(t, err) + + ringBufferHandler := ddebpf.NewRingBufferHandler(1) + ringBufferHandler.RecordHandler(&ringbuf.Record{ + RawSample: []byte("test"), + }, nil, nil) + + consumer, err := NewConsumer("test", program, func(_ []uint64) {}) + require.NoError(t, err) + consumer.handler = ringBufferHandler + + consumer.Start() + program.Stop(manager.CleanAll) + consumer.Stop() + + require.Equalf(t, int(consumer.invalidBatchCount.Get()), 1, "invalidBatchCount should be greater than 0") +} diff --git a/pkg/network/usm/kafka_monitor_test.go b/pkg/network/usm/kafka_monitor_test.go index c9fd9f5ca3776..52177ad873c24 100644 --- a/pkg/network/usm/kafka_monitor_test.go +++ b/pkg/network/usm/kafka_monitor_test.go @@ -1622,10 +1622,8 @@ func getDefaultTestConfiguration(tls bool) *config.Config { cfg := config.New() cfg.EnableKafkaMonitoring = true cfg.MaxTrackedConnections = 1000 - if tls { - cfg.EnableGoTLSSupport = true - cfg.GoTLSExcludeSelf = true - } + cfg.EnableGoTLSSupport = tls + cfg.GoTLSExcludeSelf = tls return cfg } @@ -1727,6 +1725,7 @@ func TestLoadKafkaBinary(t *testing.T) { func loadKafkaBinary(t *testing.T, debug bool) { cfg := config.New() // We don't have a way of enabling kafka without http at the moment + cfg.EnableGoTLSSupport = false cfg.EnableKafkaMonitoring = true cfg.MaxTrackedConnections = 1000 cfg.BPFDebug = debug diff --git a/pkg/network/usm/monitor_test.go b/pkg/network/usm/monitor_test.go index 595ce9593ac6f..014511d786ad6 100644 --- a/pkg/network/usm/monitor_test.go +++ b/pkg/network/usm/monitor_test.go @@ -81,7 +81,10 @@ func TestMonitorProtocolFail(t *testing.T) { patchProtocolMock(t, tt.spec) cfg := config.New() + cfg.EnableGoTLSSupport = false cfg.EnableHTTPMonitoring = true + cfg.EnableIstioMonitoring = false + monitor, err := NewMonitor(cfg, nil) skipIfNotSupported(t, err) require.NoError(t, err) diff --git a/pkg/network/usm/monitor_tls_test.go b/pkg/network/usm/monitor_tls_test.go index daadd6ca3e3a2..4a609e0ac9a30 100644 --- a/pkg/network/usm/monitor_tls_test.go +++ b/pkg/network/usm/monitor_tls_test.go @@ -69,6 +69,7 @@ func (s *tlsSuite) TestHTTPSViaLibraryIntegration() { t := s.T() cfg := config.New() + cfg.EnableGoTLSSupport = false cfg.EnableHTTPMonitoring = true cfg.EnableNativeTLSMonitoring = true /* enable protocol classification : TLS */ @@ -284,6 +285,7 @@ func (s *tlsSuite) TestOpenSSLVersions() { t := s.T() cfg := config.New() + cfg.EnableGoTLSSupport = false cfg.EnableNativeTLSMonitoring = true cfg.EnableHTTPMonitoring = true usmMonitor := setupUSMTLSMonitor(t, cfg) @@ -343,6 +345,7 @@ func (s *tlsSuite) TestOpenSSLVersionsSlowStart() { t := s.T() cfg := config.New() + cfg.EnableGoTLSSupport = false cfg.EnableNativeTLSMonitoring = true cfg.EnableHTTPMonitoring = true @@ -902,6 +905,7 @@ func (s *tlsSuite) TestNodeJSTLS() { require.NoError(t, err) cfg := config.New() + cfg.EnableGoTLSSupport = false cfg.EnableHTTPMonitoring = true cfg.EnableNodeJSMonitoring = true diff --git a/pkg/network/usm/tests/tracer_usm_linux_test.go b/pkg/network/usm/tests/tracer_usm_linux_test.go index a3186d6a8d883..551bf8e9650a7 100644 --- a/pkg/network/usm/tests/tracer_usm_linux_test.go +++ b/pkg/network/usm/tests/tracer_usm_linux_test.go @@ -299,6 +299,7 @@ func (s *USMSuite) TestIgnoreTLSClassificationIfApplicationProtocolWasDetected() t := s.T() cfg := tracertestutil.Config() cfg.ServiceMonitoringEnabled = true + cfg.EnableGoTLSSupport = false // USM cannot be enabled without a protocol. cfg.EnableHTTPMonitoring = true cfg.ProtocolClassificationEnabled = true diff --git a/pkg/network/usm/usm_grpc_monitor_test.go b/pkg/network/usm/usm_grpc_monitor_test.go index 2a931a2d5d707..f597935aeecad 100644 --- a/pkg/network/usm/usm_grpc_monitor_test.go +++ b/pkg/network/usm/usm_grpc_monitor_test.go @@ -106,6 +106,7 @@ func getGRPCClientsArray(t *testing.T, size int, withTLS bool) ([]*grpc.Client, func (s *usmGRPCSuite) getConfig() *config.Config { cfg := config.New() + cfg.EnableIstioMonitoring = false cfg.EnableHTTP2Monitoring = true cfg.EnableGoTLSSupport = s.isTLS cfg.GoTLSExcludeSelf = s.isTLS diff --git a/pkg/network/usm/usm_http2_monitor_test.go b/pkg/network/usm/usm_http2_monitor_test.go index 564bd1adced94..2e92c2647989b 100644 --- a/pkg/network/usm/usm_http2_monitor_test.go +++ b/pkg/network/usm/usm_http2_monitor_test.go @@ -79,6 +79,7 @@ type usmHTTP2Suite struct { func (s *usmHTTP2Suite) getCfg() *config.Config { cfg := config.New() + cfg.EnableIstioMonitoring = false cfg.EnableHTTP2Monitoring = true cfg.EnableGoTLSSupport = s.isTLS cfg.GoTLSExcludeSelf = s.isTLS @@ -1510,6 +1511,8 @@ func (s *usmHTTP2Suite) TestRawHuffmanEncoding() { func TestHTTP2InFlightMapCleaner(t *testing.T) { skipIfKernelNotSupported(t) cfg := config.New() + cfg.EnableGoTLSSupport = false + cfg.EnableIstioMonitoring = false cfg.EnableHTTP2Monitoring = true cfg.HTTP2DynamicTableMapCleanerInterval = 5 * time.Second cfg.HTTPIdleConnectionTTL = time.Second diff --git a/pkg/networkpath/traceroute/tcp/tcpv4.go b/pkg/networkpath/traceroute/tcp/tcpv4.go index 23f3c45950689..64484f9c0ad60 100644 --- a/pkg/networkpath/traceroute/tcp/tcpv4.go +++ b/pkg/networkpath/traceroute/tcp/tcpv4.go @@ -7,14 +7,9 @@ package tcp import ( - "fmt" - "math/rand" "net" "time" - "golang.org/x/net/ipv4" - - "github.com/DataDog/datadog-agent/pkg/util/log" "github.com/google/gopacket/layers" ) @@ -54,113 +49,6 @@ type ( } ) -// TracerouteSequential runs a traceroute sequentially where a packet is -// sent and we wait for a response before sending the next packet -func (t *TCPv4) TracerouteSequential() (*Results, error) { - // Get local address for the interface that connects to this - // host and store in in the probe - // - // TODO: do this once for the probe and hang on to the - // listener until we decide to close the probe - addr, err := localAddrForHost(t.Target, t.DestPort) - if err != nil { - return nil, fmt.Errorf("failed to get local address for target: %w", err) - } - t.srcIP = addr.IP - t.srcPort = addr.AddrPort().Port() - - // So far I haven't had success trying to simply create a socket - // using syscalls directly, but in theory doing so would allow us - // to avoid creating two listeners since we could see all IP traffic - // this way - // - // Create a raw ICMP listener to catch ICMP responses - icmpConn, err := net.ListenPacket("ip4:icmp", addr.IP.String()) - if err != nil { - return nil, fmt.Errorf("failed to create ICMP listener: %w", err) - } - defer icmpConn.Close() - // RawConn is necessary to set the TTL and ID fields - rawIcmpConn, err := ipv4.NewRawConn(icmpConn) - if err != nil { - return nil, fmt.Errorf("failed to get raw ICMP listener: %w", err) - } - - // Create a raw TCP listener to catch the TCP response from our final - // hop if we get one - tcpConn, err := net.ListenPacket("ip4:tcp", addr.IP.String()) - if err != nil { - return nil, fmt.Errorf("failed to create TCP listener: %w", err) - } - defer tcpConn.Close() - log.Tracef("Listening for TCP on: %s\n", addr.IP.String()+":"+addr.AddrPort().String()) - // RawConn is necessary to set the TTL and ID fields - rawTCPConn, err := ipv4.NewRawConn(tcpConn) - if err != nil { - return nil, fmt.Errorf("failed to get raw TCP listener: %w", err) - } - - // hops should be of length # of hops - hops := make([]*Hop, 0, t.MaxTTL-t.MinTTL) - - for i := int(t.MinTTL); i <= int(t.MaxTTL); i++ { - seqNumber := rand.Uint32() - hop, err := t.sendAndReceive(rawIcmpConn, rawTCPConn, i, seqNumber, t.Timeout) - if err != nil { - return nil, fmt.Errorf("failed to run traceroute: %w", err) - } - hops = append(hops, hop) - log.Tracef("Discovered hop: %+v", hop) - // if we've reached our destination, - // we're done - if hop.IsDest { - break - } - } - - return &Results{ - Source: t.srcIP, - SourcePort: t.srcPort, - Target: t.Target, - DstPort: t.DestPort, - Hops: hops, - }, nil -} - -func (t *TCPv4) sendAndReceive(rawIcmpConn *ipv4.RawConn, rawTCPConn *ipv4.RawConn, ttl int, seqNum uint32, timeout time.Duration) (*Hop, error) { - tcpHeader, tcpPacket, err := createRawTCPSyn(t.srcIP, t.srcPort, t.Target, t.DestPort, seqNum, ttl) - if err != nil { - log.Errorf("failed to create TCP packet with TTL: %d, error: %s", ttl, err.Error()) - return nil, err - } - - err = sendPacket(rawTCPConn, tcpHeader, tcpPacket) - if err != nil { - log.Errorf("failed to send TCP SYN: %s", err.Error()) - return nil, err - } - - start := time.Now() // TODO: is this the best place to start? - hopIP, hopPort, icmpType, end, err := listenPackets(rawIcmpConn, rawTCPConn, timeout, t.srcIP, t.srcPort, t.Target, t.DestPort, seqNum) - if err != nil { - log.Errorf("failed to listen for packets: %s", err.Error()) - return nil, err - } - - rtt := time.Duration(0) - if !hopIP.Equal(net.IP{}) { - rtt = end.Sub(start) - } - - return &Hop{ - IP: hopIP, - Port: hopPort, - ICMPType: icmpType, - RTT: rtt, - IsDest: hopIP.Equal(t.Target), - }, nil -} - // Close doesn't to anything yet, but we should // use this to close out long running sockets // when we're done with a path test diff --git a/pkg/networkpath/traceroute/tcp/tcpv4_unix.go b/pkg/networkpath/traceroute/tcp/tcpv4_unix.go new file mode 100644 index 0000000000000..32cf7e19ee11e --- /dev/null +++ b/pkg/networkpath/traceroute/tcp/tcpv4_unix.go @@ -0,0 +1,132 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build unix + +// Package tcp adds a TCP traceroute implementation to the agent +package tcp + +import ( + "fmt" + "math/rand" + "net" + "time" + + "golang.org/x/net/ipv4" + + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +// TracerouteSequential runs a traceroute sequentially where a packet is +// sent and we wait for a response before sending the next packet +func (t *TCPv4) TracerouteSequential() (*Results, error) { + // Get local address for the interface that connects to this + // host and store in in the probe + addr, err := localAddrForHost(t.Target, t.DestPort) + if err != nil { + return nil, fmt.Errorf("failed to get local address for target: %w", err) + } + t.srcIP = addr.IP + + // So far I haven't had success trying to simply create a socket + // using syscalls directly, but in theory doing so would allow us + // to avoid creating two listeners since we could see all IP traffic + // this way + // + // Create a raw ICMP listener to catch ICMP responses + icmpConn, err := net.ListenPacket("ip4:icmp", addr.IP.String()) + if err != nil { + return nil, fmt.Errorf("failed to create ICMP listener: %w", err) + } + defer icmpConn.Close() + // RawConn is necessary to set the TTL and ID fields + rawIcmpConn, err := ipv4.NewRawConn(icmpConn) + if err != nil { + return nil, fmt.Errorf("failed to get raw ICMP listener: %w", err) + } + + // Create a TCP listener with port 0 to get a random port from the OS + // and reserve it for the duration of the traceroute + port, tcpListener, err := reserveLocalPort() + if err != nil { + return nil, fmt.Errorf("failed to create TCP listener: %w", err) + } + defer tcpListener.Close() + t.srcPort = port + + // Create a raw TCP listener to catch the TCP response from our final + // hop if we get one + tcpConn, err := net.ListenPacket("ip4:tcp", addr.IP.String()) + if err != nil { + return nil, fmt.Errorf("failed to create TCP listener: %w", err) + } + defer tcpConn.Close() + log.Tracef("Listening for TCP on: %s\n", addr.IP.String()+":"+addr.AddrPort().String()) + // RawConn is necessary to set the TTL and ID fields + rawTCPConn, err := ipv4.NewRawConn(tcpConn) + if err != nil { + return nil, fmt.Errorf("failed to get raw TCP listener: %w", err) + } + + // hops should be of length # of hops + hops := make([]*Hop, 0, t.MaxTTL-t.MinTTL) + + for i := int(t.MinTTL); i <= int(t.MaxTTL); i++ { + seqNumber := rand.Uint32() + hop, err := t.sendAndReceive(rawIcmpConn, rawTCPConn, i, seqNumber, t.Timeout) + if err != nil { + return nil, fmt.Errorf("failed to run traceroute: %w", err) + } + hops = append(hops, hop) + log.Tracef("Discovered hop: %+v", hop) + // if we've reached our destination, + // we're done + if hop.IsDest { + break + } + } + + return &Results{ + Source: t.srcIP, + SourcePort: t.srcPort, + Target: t.Target, + DstPort: t.DestPort, + Hops: hops, + }, nil +} + +func (t *TCPv4) sendAndReceive(rawIcmpConn *ipv4.RawConn, rawTCPConn *ipv4.RawConn, ttl int, seqNum uint32, timeout time.Duration) (*Hop, error) { + tcpHeader, tcpPacket, err := createRawTCPSyn(t.srcIP, t.srcPort, t.Target, t.DestPort, seqNum, ttl) + if err != nil { + log.Errorf("failed to create TCP packet with TTL: %d, error: %s", ttl, err.Error()) + return nil, err + } + + err = sendPacket(rawTCPConn, tcpHeader, tcpPacket) + if err != nil { + log.Errorf("failed to send TCP SYN: %s", err.Error()) + return nil, err + } + + start := time.Now() // TODO: is this the best place to start? + hopIP, hopPort, icmpType, end, err := listenPackets(rawIcmpConn, rawTCPConn, timeout, t.srcIP, t.srcPort, t.Target, t.DestPort, seqNum) + if err != nil { + log.Errorf("failed to listen for packets: %s", err.Error()) + return nil, err + } + + rtt := time.Duration(0) + if !hopIP.Equal(net.IP{}) { + rtt = end.Sub(start) + } + + return &Hop{ + IP: hopIP, + Port: hopPort, + ICMPType: icmpType, + RTT: rtt, + IsDest: hopIP.Equal(t.Target), + }, nil +} diff --git a/pkg/networkpath/traceroute/tcp/tcpv4_windows.go b/pkg/networkpath/traceroute/tcp/tcpv4_windows.go new file mode 100644 index 0000000000000..3067695b0e559 --- /dev/null +++ b/pkg/networkpath/traceroute/tcp/tcpv4_windows.go @@ -0,0 +1,148 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package tcp adds a TCP traceroute implementation to the agent +package tcp + +import ( + "fmt" + "math/rand" + "net" + "time" + + "golang.org/x/sys/windows" + + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +var ( + sendTo = windows.Sendto +) + +type winrawsocket struct { + s windows.Handle +} + +func (w *winrawsocket) close() { + if w.s != windows.InvalidHandle { + windows.Closesocket(w.s) // nolint: errcheck + } + w.s = windows.InvalidHandle +} + +func (t *TCPv4) sendRawPacket(w *winrawsocket, payload []byte) error { + + dst := t.Target.To4() + sa := &windows.SockaddrInet4{ + Port: int(t.DestPort), + Addr: [4]byte{dst[0], dst[1], dst[2], dst[3]}, + } + if err := sendTo(w.s, payload, 0, sa); err != nil { + return fmt.Errorf("failed to send packet: %w", err) + } + return nil +} + +func createRawSocket() (*winrawsocket, error) { + s, err := windows.Socket(windows.AF_INET, windows.SOCK_RAW, windows.IPPROTO_IP) + if err != nil { + return nil, fmt.Errorf("failed to create raw socket: %w", err) + } + on := int(1) + err = windows.SetsockoptInt(s, windows.IPPROTO_IP, windows.IP_HDRINCL, on) + if err != nil { + windows.Closesocket(s) // nolint: errcheck + return nil, fmt.Errorf("failed to set IP_HDRINCL: %w", err) + } + + err = windows.SetsockoptInt(s, windows.SOL_SOCKET, windows.SO_RCVTIMEO, 100) + if err != nil { + windows.Closesocket(s) // nolint: errcheck + return nil, fmt.Errorf("failed to set SO_RCVTIMEO: %w", err) + } + return &winrawsocket{s: s}, nil +} + +// TracerouteSequential runs a traceroute sequentially where a packet is +// sent and we wait for a response before sending the next packet +func (t *TCPv4) TracerouteSequential() (*Results, error) { + log.Debugf("Running traceroute to %+v", t) + // Get local address for the interface that connects to this + // host and store in in the probe + // + // TODO: do this once for the probe and hang on to the + // listener until we decide to close the probe + addr, err := localAddrForHost(t.Target, t.DestPort) + if err != nil { + return nil, fmt.Errorf("failed to get local address for target: %w", err) + } + t.srcIP = addr.IP + t.srcPort = addr.AddrPort().Port() + + rs, err := createRawSocket() + if err != nil { + return nil, fmt.Errorf("failed to create raw socket: %w", err) + } + defer rs.close() + + hops := make([]*Hop, 0, int(t.MaxTTL-t.MinTTL)+1) + + for i := int(t.MinTTL); i <= int(t.MaxTTL); i++ { + seqNumber := rand.Uint32() + hop, err := t.sendAndReceive(rs, i, seqNumber, t.Timeout) + if err != nil { + return nil, fmt.Errorf("failed to run traceroute: %w", err) + } + hops = append(hops, hop) + log.Tracef("Discovered hop: %+v", hop) + // if we've reached our destination, + // we're done + if hop.IsDest { + break + } + } + + return &Results{ + Source: t.srcIP, + SourcePort: t.srcPort, + Target: t.Target, + DstPort: t.DestPort, + Hops: hops, + }, nil +} + +func (t *TCPv4) sendAndReceive(rs *winrawsocket, ttl int, seqNum uint32, timeout time.Duration) (*Hop, error) { + _, buffer, _, err := createRawTCPSynBuffer(t.srcIP, t.srcPort, t.Target, t.DestPort, seqNum, ttl) + if err != nil { + log.Errorf("failed to create TCP packet with TTL: %d, error: %s", ttl, err.Error()) + return nil, err + } + + err = t.sendRawPacket(rs, buffer) + if err != nil { + log.Errorf("failed to send TCP packet: %s", err.Error()) + return nil, err + } + + start := time.Now() // TODO: is this the best place to start? + hopIP, hopPort, icmpType, end, err := rs.listenPackets(timeout, t.srcIP, t.srcPort, t.Target, t.DestPort, seqNum) + if err != nil { + log.Errorf("failed to listen for packets: %s", err.Error()) + return nil, err + } + + rtt := time.Duration(0) + if !hopIP.Equal(net.IP{}) { + rtt = end.Sub(start) + } + + return &Hop{ + IP: hopIP, + Port: hopPort, + ICMPType: icmpType, + RTT: rtt, + IsDest: hopIP.Equal(t.Target), + }, nil +} diff --git a/pkg/networkpath/traceroute/tcp/utils.go b/pkg/networkpath/traceroute/tcp/utils.go index be2ed9b6812c7..ae7d507f23940 100644 --- a/pkg/networkpath/traceroute/tcp/utils.go +++ b/pkg/networkpath/traceroute/tcp/utils.go @@ -6,17 +6,13 @@ package tcp import ( - "context" "fmt" "net" "strconv" - "sync" - "time" "github.com/DataDog/datadog-agent/pkg/util/log" "github.com/google/gopacket" "github.com/google/gopacket/layers" - "go.uber.org/multierr" "golang.org/x/net/ipv4" ) @@ -59,12 +55,6 @@ type ( DstIP net.IP TCPResponse layers.TCP } - - rawConnWrapper interface { - SetReadDeadline(t time.Time) error - ReadFrom(b []byte) (*ipv4.Header, []byte, *ipv4.ControlMessage, error) - WriteTo(h *ipv4.Header, p []byte, cm *ipv4.ControlMessage) error - } ) func localAddrForHost(destIP net.IP, destPort uint16) (*net.UDPAddr, error) { @@ -86,8 +76,33 @@ func localAddrForHost(destIP net.IP, destPort uint16) (*net.UDPAddr, error) { return localUDPAddr, nil } +// reserveLocalPort reserves an ephemeral TCP port +// and returns both the listener and port because the +// listener should be held until the port is no longer +// in use +func reserveLocalPort() (uint16, net.Listener, error) { + // Create a TCP listener with port 0 to get a random port from the OS + // and reserve it for the duration of the traceroute + tcpListener, err := net.Listen("tcp", ":0") + if err != nil { + return 0, nil, fmt.Errorf("failed to create TCP listener: %w", err) + } + tcpAddr := tcpListener.Addr().(*net.TCPAddr) + + return uint16(tcpAddr.Port), tcpListener, nil +} + // createRawTCPSyn creates a TCP packet with the specified parameters func createRawTCPSyn(sourceIP net.IP, sourcePort uint16, destIP net.IP, destPort uint16, seqNum uint32, ttl int) (*ipv4.Header, []byte, error) { + ipHdr, packet, hdrlen, err := createRawTCPSynBuffer(sourceIP, sourcePort, destIP, destPort, seqNum, ttl) + if err != nil { + return nil, nil, err + } + + return ipHdr, packet[hdrlen:], nil +} + +func createRawTCPSynBuffer(sourceIP net.IP, sourcePort uint16, destIP net.IP, destPort uint16, seqNum uint32, ttl int) (*ipv4.Header, []byte, int, error) { ipLayer := &layers.IPv4{ Version: 4, Length: 20, @@ -109,7 +124,7 @@ func createRawTCPSyn(sourceIP net.IP, sourcePort uint16, destIP net.IP, destPort err := tcpLayer.SetNetworkLayerForChecksum(ipLayer) if err != nil { - return nil, nil, fmt.Errorf("failed to create packet checksum: %w", err) + return nil, nil, 0, fmt.Errorf("failed to create packet checksum: %w", err) } buf := gopacket.NewSerializeBuffer() opts := gopacket.SerializeOptions{FixLengths: true, ComputeChecksums: true} @@ -118,138 +133,16 @@ func createRawTCPSyn(sourceIP net.IP, sourcePort uint16, destIP net.IP, destPort tcpLayer, ) if err != nil { - return nil, nil, fmt.Errorf("failed to serialize packet: %w", err) + return nil, nil, 0, fmt.Errorf("failed to serialize packet: %w", err) } packet := buf.Bytes() var ipHdr ipv4.Header if err := ipHdr.Parse(packet[:20]); err != nil { - return nil, nil, fmt.Errorf("failed to parse IP header: %w", err) + return nil, nil, 0, fmt.Errorf("failed to parse IP header: %w", err) } - return &ipHdr, packet[20:], nil -} - -// sendPacket sends a raw IPv4 packet using the passed connection -func sendPacket(rawConn rawConnWrapper, header *ipv4.Header, payload []byte) error { - if err := rawConn.WriteTo(header, payload, nil); err != nil { - return err - } - - return nil -} - -// listenPackets takes in raw ICMP and TCP connections and listens for matching ICMP -// and TCP responses based on the passed in trace information. If neither listener -// receives a matching packet within the timeout, a blank response is returned. -// Once a matching packet is received by a listener, it will cause the other listener -// to be canceled, and data from the matching packet will be returned to the caller -func listenPackets(icmpConn rawConnWrapper, tcpConn rawConnWrapper, timeout time.Duration, localIP net.IP, localPort uint16, remoteIP net.IP, remotePort uint16, seqNum uint32) (net.IP, uint16, layers.ICMPv4TypeCode, time.Time, error) { - var tcpErr error - var icmpErr error - var wg sync.WaitGroup - var icmpIP net.IP - var tcpIP net.IP - var icmpCode layers.ICMPv4TypeCode - var tcpFinished time.Time - var icmpFinished time.Time - var port uint16 - wg.Add(2) - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - go func() { - defer wg.Done() - defer cancel() - tcpIP, port, _, tcpFinished, tcpErr = handlePackets(ctx, tcpConn, "tcp", localIP, localPort, remoteIP, remotePort, seqNum) - }() - go func() { - defer wg.Done() - defer cancel() - icmpIP, _, icmpCode, icmpFinished, icmpErr = handlePackets(ctx, icmpConn, "icmp", localIP, localPort, remoteIP, remotePort, seqNum) - }() - wg.Wait() - - if tcpErr != nil && icmpErr != nil { - _, tcpCanceled := tcpErr.(canceledError) - _, icmpCanceled := icmpErr.(canceledError) - if icmpCanceled && tcpCanceled { - log.Trace("timed out waiting for responses") - return net.IP{}, 0, 0, time.Time{}, nil - } - if tcpErr != nil { - log.Errorf("TCP listener error: %s", tcpErr.Error()) - } - if icmpErr != nil { - log.Errorf("ICMP listener error: %s", icmpErr.Error()) - } - - return net.IP{}, 0, 0, time.Time{}, multierr.Append(fmt.Errorf("tcp error: %w", tcpErr), fmt.Errorf("icmp error: %w", icmpErr)) - } - - // if there was an error for TCP, but not - // ICMP, return the ICMP response - if tcpErr != nil { - return icmpIP, port, icmpCode, icmpFinished, nil - } - - // return the TCP response - return tcpIP, port, 0, tcpFinished, nil -} - -// handlePackets in its current implementation should listen for the first matching -// packet on the connection and then return. If no packet is received within the -// timeout or if the listener is canceled, it should return a canceledError -func handlePackets(ctx context.Context, conn rawConnWrapper, listener string, localIP net.IP, localPort uint16, remoteIP net.IP, remotePort uint16, seqNum uint32) (net.IP, uint16, layers.ICMPv4TypeCode, time.Time, error) { - buf := make([]byte, 1024) - tp := newTCPParser() - for { - select { - case <-ctx.Done(): - return net.IP{}, 0, 0, time.Time{}, canceledError("listener canceled") - default: - } - now := time.Now() - err := conn.SetReadDeadline(now.Add(time.Millisecond * 100)) - if err != nil { - return net.IP{}, 0, 0, time.Time{}, fmt.Errorf("failed to read: %w", err) - } - header, packet, _, err := conn.ReadFrom(buf) - if err != nil { - if nerr, ok := err.(*net.OpError); ok { - if nerr.Timeout() { - continue - } - } - return net.IP{}, 0, 0, time.Time{}, err - } - // once we have a packet, take a timestamp to know when - // the response was received, if it matches, we will - // return this timestamp - received := time.Now() - // TODO: remove listener constraint and parse all packets - // in the same function return a succinct struct here - if listener == "icmp" { - icmpResponse, err := parseICMP(header, packet) - if err != nil { - log.Tracef("failed to parse ICMP packet: %s", err) - continue - } - if icmpMatch(localIP, localPort, remoteIP, remotePort, seqNum, icmpResponse) { - return icmpResponse.SrcIP, 0, icmpResponse.TypeCode, received, nil - } - } else if listener == "tcp" { - tcpResp, err := tp.parseTCP(header, packet) - if err != nil { - log.Tracef("failed to parse TCP packet: %s", err) - continue - } - if tcpMatch(localIP, localPort, remoteIP, remotePort, seqNum, tcpResp) { - return tcpResp.SrcIP, uint16(tcpResp.TCPResponse.SrcPort), 0, received, nil - } - } else { - return net.IP{}, 0, 0, received, fmt.Errorf("unsupported listener type") - } - } + return &ipHdr, packet, 20, nil } // parseICMP takes in an IPv4 header and payload and tries to convert to an ICMP diff --git a/pkg/networkpath/traceroute/tcp/utils_test.go b/pkg/networkpath/traceroute/tcp/utils_test.go index b38d7fd5bc492..d79cda12ac5da 100644 --- a/pkg/networkpath/traceroute/tcp/utils_test.go +++ b/pkg/networkpath/traceroute/tcp/utils_test.go @@ -8,13 +8,11 @@ package tcp import ( - "context" - "errors" + "fmt" "net" "reflect" - "strings" + "runtime" "testing" - "time" "github.com/google/gopacket" "github.com/google/gopacket/layers" @@ -31,153 +29,88 @@ var ( innerDstIP = net.ParseIP("192.168.1.1") ) -type ( - mockRawConn struct { - setReadDeadlineErr error - readDeadline time.Time +func Test_reserveLocalPort(t *testing.T) { + // WHEN we reserve a local port + port, listener, err := reserveLocalPort() + require.NoError(t, err) + defer listener.Close() + require.NotNil(t, listener) + + // THEN we should not be able to get another connection + // on the same port + conn2, err := net.Listen("tcp", fmt.Sprintf("0.0.0.0:%d", port)) + assert.Error(t, err) + assert.Nil(t, conn2) +} + +func Test_createRawTCPSyn(t *testing.T) { + if runtime.GOOS == "darwin" { + t.Skip("Test_createRawTCPSyn is broken on macOS") + } - readTimeoutCount int - readFromErr error - header *ipv4.Header - payload []byte - cm *ipv4.ControlMessage + srcIP := net.ParseIP("1.2.3.4") + dstIP := net.ParseIP("5.6.7.8") + srcPort := uint16(12345) + dstPort := uint16(80) + seqNum := uint32(1000) + ttl := 64 - writeDelay time.Duration - writeToErr error + expectedIPHeader := &ipv4.Header{ + Version: 4, + TTL: ttl, + ID: 41821, + Protocol: 6, + Dst: dstIP, + Src: srcIP, + Len: 20, + TotalLen: 40, + Checksum: 51039, } - mockTimeoutErr string -) + expectedPktBytes := []byte{ + 0x30, 0x39, 0x0, 0x50, 0x0, 0x0, 0x3, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x50, 0x2, 0x4, 0x0, 0x67, 0x5e, 0x0, 0x0, + } -func Test_handlePackets(t *testing.T) { - _, tcpBytes := createMockTCPPacket(createMockIPv4Header(dstIP, srcIP, 6), createMockTCPLayer(443, 12345, 28394, 28395, true, true, true)) + ipHeader, pktBytes, err := createRawTCPSyn(srcIP, srcPort, dstIP, dstPort, seqNum, ttl) + require.NoError(t, err) + assert.Equal(t, expectedIPHeader, ipHeader) + assert.Equal(t, expectedPktBytes, pktBytes) +} - tt := []struct { - description string - // input - ctxTimeout time.Duration - conn rawConnWrapper - listener string - localIP net.IP - localPort uint16 - remoteIP net.IP - remotePort uint16 - seqNum uint32 - // output - expectedIP net.IP - expectedPort uint16 - expectedTypeCode layers.ICMPv4TypeCode - errMsg string - }{ - { - description: "canceled context returns canceledErr", - ctxTimeout: 300 * time.Millisecond, - conn: &mockRawConn{ - readTimeoutCount: 100, - readFromErr: errors.New("bad test error"), - }, - errMsg: "canceled", - }, - { - description: "set timeout error returns an error", - ctxTimeout: 300 * time.Millisecond, - conn: &mockRawConn{ - setReadDeadlineErr: errors.New("good test error"), - readTimeoutCount: 100, - readFromErr: errors.New("bad error"), - }, - errMsg: "good test error", - }, - { - description: "non-timeout read error returns an error", - ctxTimeout: 1 * time.Second, - conn: &mockRawConn{ - readFromErr: errors.New("test read error"), - }, - errMsg: "test read error", - }, - { - description: "invalid listener returns unsupported listener", - ctxTimeout: 1 * time.Second, - conn: &mockRawConn{ - header: &ipv4.Header{}, - payload: nil, - }, - listener: "invalid", - errMsg: "unsupported", - }, - { - description: "failed ICMP parsing eventuallly returns cancel timeout", - ctxTimeout: 500 * time.Millisecond, - conn: &mockRawConn{ - header: &ipv4.Header{}, - payload: nil, - }, - listener: "icmp", - errMsg: "canceled", - }, - { - description: "failed TCP parsing eventuallly returns cancel timeout", - ctxTimeout: 500 * time.Millisecond, - conn: &mockRawConn{ - header: &ipv4.Header{}, - payload: nil, - }, - listener: "tcp", - errMsg: "canceled", - }, - { - description: "successful ICMP parsing returns IP, port, and type code", - ctxTimeout: 500 * time.Millisecond, - conn: &mockRawConn{ - header: createMockIPv4Header(srcIP, dstIP, 1), - payload: createMockICMPPacket(createMockICMPLayer(layers.ICMPv4CodeTTLExceeded), createMockIPv4Layer(innerSrcIP, innerDstIP, layers.IPProtocolTCP), createMockTCPLayer(12345, 443, 28394, 12737, true, true, true), false), - }, - localIP: innerSrcIP, - localPort: 12345, - remoteIP: innerDstIP, - remotePort: 443, - seqNum: 28394, - listener: "icmp", - expectedIP: srcIP, - expectedPort: 0, - expectedTypeCode: layers.ICMPv4CodeTTLExceeded, - }, - { - description: "successful TCP parsing returns IP, port, and type code", - ctxTimeout: 500 * time.Millisecond, - conn: &mockRawConn{ - header: createMockIPv4Header(dstIP, srcIP, 6), - payload: tcpBytes, - }, - localIP: srcIP, - localPort: 12345, - remoteIP: dstIP, - remotePort: 443, - seqNum: 28394, - listener: "tcp", - expectedIP: dstIP, - expectedPort: 443, - expectedTypeCode: 0, - }, +func Test_createRawTCPSynBuffer(t *testing.T) { + if runtime.GOOS == "darwin" { + t.Skip("Test_createRawTCPSyn is broken on macOS") } - for _, test := range tt { - t.Run(test.description, func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), test.ctxTimeout) - defer cancel() - actualIP, actualPort, actualTypeCode, _, err := handlePackets(ctx, test.conn, test.listener, test.localIP, test.localPort, test.remoteIP, test.remotePort, test.seqNum) - if test.errMsg != "" { - require.Error(t, err) - assert.True(t, strings.Contains(err.Error(), test.errMsg)) - return - } - require.NoError(t, err) - assert.Truef(t, test.expectedIP.Equal(actualIP), "mismatch source IPs: expected %s, got %s", test.expectedIP.String(), actualIP.String()) - assert.Equal(t, test.expectedPort, actualPort) - assert.Equal(t, test.expectedTypeCode, actualTypeCode) - }) + srcIP := net.ParseIP("1.2.3.4") + dstIP := net.ParseIP("5.6.7.8") + srcPort := uint16(12345) + dstPort := uint16(80) + seqNum := uint32(1000) + ttl := 64 + + expectedIPHeader := &ipv4.Header{ + Version: 4, + TTL: ttl, + ID: 41821, + Protocol: 6, + Dst: dstIP, + Src: srcIP, + Len: 20, + TotalLen: 40, + Checksum: 51039, + } + + expectedPktBytes := []byte{ + 0x45, 0x0, 0x0, 0x28, 0xa3, 0x5d, 0x0, 0x0, 0x40, 0x6, 0xc7, 0x5f, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x30, 0x39, 0x0, 0x50, 0x0, 0x0, 0x3, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x50, 0x2, 0x4, 0x0, 0x67, 0x5e, 0x0, 0x0, } + + ipHeader, pktBytes, headerLength, err := createRawTCPSynBuffer(srcIP, srcPort, dstIP, dstPort, seqNum, ttl) + + require.NoError(t, err) + assert.Equal(t, expectedIPHeader, ipHeader) + assert.Equal(t, 20, headerLength) + assert.Equal(t, expectedPktBytes, pktBytes) } func Test_parseICMP(t *testing.T) { @@ -210,14 +143,14 @@ func Test_parseICMP(t *testing.T) { { description: "missing inner layers should return an error", inHeader: ipv4Header, - inPayload: createMockICMPPacket(icmpLayer, nil, nil, false), + inPayload: createMockICMPPacket(nil, icmpLayer, nil, nil, false), expected: nil, errMsg: "failed to decode inner ICMP payload", }, { description: "ICMP packet with partial TCP header should create icmpResponse", inHeader: ipv4Header, - inPayload: createMockICMPPacket(icmpLayer, innerIPv4Layer, innerTCPLayer, true), + inPayload: createMockICMPPacket(nil, icmpLayer, innerIPv4Layer, innerTCPLayer, true), expected: &icmpResponse{ SrcIP: srcIP, DstIP: dstIP, @@ -232,7 +165,7 @@ func Test_parseICMP(t *testing.T) { { description: "full ICMP packet should create icmpResponse", inHeader: ipv4Header, - inPayload: createMockICMPPacket(icmpLayer, innerIPv4Layer, innerTCPLayer, true), + inPayload: createMockICMPPacket(nil, icmpLayer, innerIPv4Layer, innerTCPLayer, true), expected: &icmpResponse{ SrcIP: srcIP, DstIP: dstIP, @@ -275,7 +208,7 @@ func Test_parseTCP(t *testing.T) { tcpLayer := createMockTCPLayer(12345, 443, 28394, 12737, true, true, true) // full packet - encodedTCPLayer, fullTCPPacket := createMockTCPPacket(ipv4Header, tcpLayer) + encodedTCPLayer, fullTCPPacket := createMockTCPPacket(ipv4Header, tcpLayer, false) tt := []struct { description string @@ -337,7 +270,7 @@ func BenchmarkParseTCP(b *testing.B) { tcpLayer := createMockTCPLayer(12345, 443, 28394, 12737, true, true, true) // full packet - _, fullTCPPacket := createMockTCPPacket(ipv4Header, tcpLayer) + _, fullTCPPacket := createMockTCPPacket(ipv4Header, tcpLayer, false) tp := newTCPParser() @@ -350,40 +283,6 @@ func BenchmarkParseTCP(b *testing.B) { } } -func (m *mockRawConn) SetReadDeadline(t time.Time) error { - if m.setReadDeadlineErr != nil { - return m.setReadDeadlineErr - } - m.readDeadline = t - - return nil -} -func (m *mockRawConn) ReadFrom(_ []byte) (*ipv4.Header, []byte, *ipv4.ControlMessage, error) { - if m.readTimeoutCount > 0 { - m.readTimeoutCount-- - time.Sleep(time.Until(m.readDeadline)) - return nil, nil, nil, &net.OpError{Err: mockTimeoutErr("test timeout error")} - } - if m.readFromErr != nil { - return nil, nil, nil, m.readFromErr - } - - return m.header, m.payload, m.cm, nil -} - -func (m *mockRawConn) WriteTo(_ *ipv4.Header, _ []byte, _ *ipv4.ControlMessage) error { - time.Sleep(m.writeDelay) - return m.writeToErr -} - -func (me mockTimeoutErr) Error() string { - return string(me) -} - -func (me mockTimeoutErr) Timeout() bool { - return true -} - func createMockIPv4Header(srcIP, dstIP net.IP, protocol int) *ipv4.Header { return &ipv4.Header{ Version: 4, @@ -395,7 +294,7 @@ func createMockIPv4Header(srcIP, dstIP net.IP, protocol int) *ipv4.Header { } } -func createMockICMPPacket(icmpLayer *layers.ICMPv4, innerIP *layers.IPv4, innerTCP *layers.TCP, partialTCPHeader bool) []byte { +func createMockICMPPacket(ipLayer *layers.IPv4, icmpLayer *layers.ICMPv4, innerIP *layers.IPv4, innerTCP *layers.TCP, partialTCPHeader bool) []byte { innerBuf := gopacket.NewSerializeBuffer() opts := gopacket.SerializeOptions{FixLengths: true, ComputeChecksums: true} @@ -428,10 +327,21 @@ func createMockICMPPacket(icmpLayer *layers.ICMPv4, innerIP *layers.IPv4, innerT gopacket.Payload(payload), ) + icmpBytes := buf.Bytes() + if ipLayer == nil { + return icmpBytes + } + + buf = gopacket.NewSerializeBuffer() + gopacket.SerializeLayers(buf, opts, + ipLayer, + gopacket.Payload(icmpBytes), + ) + return buf.Bytes() } -func createMockTCPPacket(ipHeader *ipv4.Header, tcpLayer *layers.TCP) (*layers.TCP, []byte) { +func createMockTCPPacket(ipHeader *ipv4.Header, tcpLayer *layers.TCP, includeHeader bool) (*layers.TCP, []byte) { ipLayer := &layers.IPv4{ Version: 4, SrcIP: ipHeader.Src, @@ -443,9 +353,16 @@ func createMockTCPPacket(ipHeader *ipv4.Header, tcpLayer *layers.TCP) (*layers.T tcpLayer.SetNetworkLayerForChecksum(ipLayer) buf := gopacket.NewSerializeBuffer() opts := gopacket.SerializeOptions{FixLengths: true, ComputeChecksums: true} - gopacket.SerializeLayers(buf, opts, - tcpLayer, - ) + if includeHeader { + gopacket.SerializeLayers(buf, opts, + ipLayer, + tcpLayer, + ) + } else { + gopacket.SerializeLayers(buf, opts, + tcpLayer, + ) + } pkt := gopacket.NewPacket(buf.Bytes(), layers.LayerTypeTCP, gopacket.Default) diff --git a/pkg/networkpath/traceroute/tcp/utils_unix.go b/pkg/networkpath/traceroute/tcp/utils_unix.go new file mode 100644 index 0000000000000..2a52e5f8bea88 --- /dev/null +++ b/pkg/networkpath/traceroute/tcp/utils_unix.go @@ -0,0 +1,151 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build unix + +package tcp + +import ( + "context" + "fmt" + "net" + "sync" + "time" + + "github.com/DataDog/datadog-agent/pkg/util/log" + "github.com/google/gopacket/layers" + "go.uber.org/multierr" + "golang.org/x/net/ipv4" +) + +type ( + rawConnWrapper interface { + SetReadDeadline(t time.Time) error + ReadFrom(b []byte) (*ipv4.Header, []byte, *ipv4.ControlMessage, error) + WriteTo(h *ipv4.Header, p []byte, cm *ipv4.ControlMessage) error + } +) + +// sendPacket sends a raw IPv4 packet using the passed connection +func sendPacket(rawConn rawConnWrapper, header *ipv4.Header, payload []byte) error { + if err := rawConn.WriteTo(header, payload, nil); err != nil { + return err + } + + return nil +} + +// listenPackets takes in raw ICMP and TCP connections and listens for matching ICMP +// and TCP responses based on the passed in trace information. If neither listener +// receives a matching packet within the timeout, a blank response is returned. +// Once a matching packet is received by a listener, it will cause the other listener +// to be canceled, and data from the matching packet will be returned to the caller +func listenPackets(icmpConn rawConnWrapper, tcpConn rawConnWrapper, timeout time.Duration, localIP net.IP, localPort uint16, remoteIP net.IP, remotePort uint16, seqNum uint32) (net.IP, uint16, layers.ICMPv4TypeCode, time.Time, error) { + var tcpErr error + var icmpErr error + var wg sync.WaitGroup + var icmpIP net.IP + var tcpIP net.IP + var icmpCode layers.ICMPv4TypeCode + var tcpFinished time.Time + var icmpFinished time.Time + var port uint16 + wg.Add(2) + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + go func() { + defer wg.Done() + defer cancel() + tcpIP, port, _, tcpFinished, tcpErr = handlePackets(ctx, tcpConn, "tcp", localIP, localPort, remoteIP, remotePort, seqNum) + }() + go func() { + defer wg.Done() + defer cancel() + icmpIP, _, icmpCode, icmpFinished, icmpErr = handlePackets(ctx, icmpConn, "icmp", localIP, localPort, remoteIP, remotePort, seqNum) + }() + wg.Wait() + + if tcpErr != nil && icmpErr != nil { + _, tcpCanceled := tcpErr.(canceledError) + _, icmpCanceled := icmpErr.(canceledError) + if icmpCanceled && tcpCanceled { + log.Trace("timed out waiting for responses") + return net.IP{}, 0, 0, time.Time{}, nil + } + if tcpErr != nil { + log.Errorf("TCP listener error: %s", tcpErr.Error()) + } + if icmpErr != nil { + log.Errorf("ICMP listener error: %s", icmpErr.Error()) + } + + return net.IP{}, 0, 0, time.Time{}, multierr.Append(fmt.Errorf("tcp error: %w", tcpErr), fmt.Errorf("icmp error: %w", icmpErr)) + } + + // if there was an error for TCP, but not + // ICMP, return the ICMP response + if tcpErr != nil { + return icmpIP, port, icmpCode, icmpFinished, nil + } + + // return the TCP response + return tcpIP, port, 0, tcpFinished, nil +} + +// handlePackets in its current implementation should listen for the first matching +// packet on the connection and then return. If no packet is received within the +// timeout or if the listener is canceled, it should return a canceledError +func handlePackets(ctx context.Context, conn rawConnWrapper, listener string, localIP net.IP, localPort uint16, remoteIP net.IP, remotePort uint16, seqNum uint32) (net.IP, uint16, layers.ICMPv4TypeCode, time.Time, error) { + buf := make([]byte, 1024) + tp := newTCPParser() + for { + select { + case <-ctx.Done(): + return net.IP{}, 0, 0, time.Time{}, canceledError("listener canceled") + default: + } + now := time.Now() + err := conn.SetReadDeadline(now.Add(time.Millisecond * 100)) + if err != nil { + return net.IP{}, 0, 0, time.Time{}, fmt.Errorf("failed to read: %w", err) + } + header, packet, _, err := conn.ReadFrom(buf) + if err != nil { + if nerr, ok := err.(*net.OpError); ok { + if nerr.Timeout() { + continue + } + } + return net.IP{}, 0, 0, time.Time{}, err + } + // once we have a packet, take a timestamp to know when + // the response was received, if it matches, we will + // return this timestamp + received := time.Now() + // TODO: remove listener constraint and parse all packets + // in the same function return a succinct struct here + if listener == "icmp" { + icmpResponse, err := parseICMP(header, packet) + if err != nil { + log.Tracef("failed to parse ICMP packet: %s", err) + continue + } + if icmpMatch(localIP, localPort, remoteIP, remotePort, seqNum, icmpResponse) { + return icmpResponse.SrcIP, 0, icmpResponse.TypeCode, received, nil + } + } else if listener == "tcp" { + tcpResp, err := tp.parseTCP(header, packet) + if err != nil { + log.Tracef("failed to parse TCP packet: %s", err) + continue + } + if tcpMatch(localIP, localPort, remoteIP, remotePort, seqNum, tcpResp) { + return tcpResp.SrcIP, uint16(tcpResp.TCPResponse.SrcPort), 0, received, nil + } + } else { + return net.IP{}, 0, 0, received, fmt.Errorf("unsupported listener type") + } + } +} diff --git a/pkg/networkpath/traceroute/tcp/utils_unix_test.go b/pkg/networkpath/traceroute/tcp/utils_unix_test.go new file mode 100644 index 0000000000000..731f5affe1380 --- /dev/null +++ b/pkg/networkpath/traceroute/tcp/utils_unix_test.go @@ -0,0 +1,206 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build test && unix + +package tcp + +import ( + "context" + "errors" + "net" + "strings" + "testing" + "time" + + "github.com/google/gopacket/layers" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/net/ipv4" +) + +type ( + mockRawConn struct { + setReadDeadlineErr error + readDeadline time.Time + + readTimeoutCount int + readFromErr error + header *ipv4.Header + payload []byte + cm *ipv4.ControlMessage + + writeDelay time.Duration + writeToErr error + } + + mockTimeoutErr string +) + +func Test_handlePackets(t *testing.T) { + _, tcpBytes := createMockTCPPacket(createMockIPv4Header(dstIP, srcIP, 6), createMockTCPLayer(443, 12345, 28394, 28395, true, true, true), false) + + tt := []struct { + description string + // input + ctxTimeout time.Duration + conn rawConnWrapper + listener string + localIP net.IP + localPort uint16 + remoteIP net.IP + remotePort uint16 + seqNum uint32 + // output + expectedIP net.IP + expectedPort uint16 + expectedTypeCode layers.ICMPv4TypeCode + errMsg string + }{ + { + description: "canceled context returns canceledErr", + ctxTimeout: 300 * time.Millisecond, + conn: &mockRawConn{ + readTimeoutCount: 100, + readFromErr: errors.New("bad test error"), + }, + errMsg: "canceled", + }, + { + description: "set timeout error returns an error", + ctxTimeout: 300 * time.Millisecond, + conn: &mockRawConn{ + setReadDeadlineErr: errors.New("good test error"), + readTimeoutCount: 100, + readFromErr: errors.New("bad error"), + }, + errMsg: "good test error", + }, + { + description: "non-timeout read error returns an error", + ctxTimeout: 1 * time.Second, + conn: &mockRawConn{ + readFromErr: errors.New("test read error"), + }, + errMsg: "test read error", + }, + { + description: "invalid listener returns unsupported listener", + ctxTimeout: 1 * time.Second, + conn: &mockRawConn{ + header: &ipv4.Header{}, + payload: nil, + }, + listener: "invalid", + errMsg: "unsupported", + }, + { + description: "failed ICMP parsing eventuallly returns cancel timeout", + ctxTimeout: 500 * time.Millisecond, + conn: &mockRawConn{ + header: &ipv4.Header{}, + payload: nil, + }, + listener: "icmp", + errMsg: "canceled", + }, + { + description: "failed TCP parsing eventuallly returns cancel timeout", + ctxTimeout: 500 * time.Millisecond, + conn: &mockRawConn{ + header: &ipv4.Header{}, + payload: nil, + }, + listener: "tcp", + errMsg: "canceled", + }, + { + description: "successful ICMP parsing returns IP, port, and type code", + ctxTimeout: 500 * time.Millisecond, + conn: &mockRawConn{ + header: createMockIPv4Header(srcIP, dstIP, 1), + payload: createMockICMPPacket(nil, createMockICMPLayer(layers.ICMPv4CodeTTLExceeded), createMockIPv4Layer(innerSrcIP, innerDstIP, layers.IPProtocolTCP), createMockTCPLayer(12345, 443, 28394, 12737, true, true, true), false), + }, + localIP: innerSrcIP, + localPort: 12345, + remoteIP: innerDstIP, + remotePort: 443, + seqNum: 28394, + listener: "icmp", + expectedIP: srcIP, + expectedPort: 0, + expectedTypeCode: layers.ICMPv4CodeTTLExceeded, + }, + { + description: "successful TCP parsing returns IP, port, and type code", + ctxTimeout: 500 * time.Millisecond, + conn: &mockRawConn{ + header: createMockIPv4Header(dstIP, srcIP, 6), + payload: tcpBytes, + }, + localIP: srcIP, + localPort: 12345, + remoteIP: dstIP, + remotePort: 443, + seqNum: 28394, + listener: "tcp", + expectedIP: dstIP, + expectedPort: 443, + expectedTypeCode: 0, + }, + } + + for _, test := range tt { + t.Run(test.description, func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), test.ctxTimeout) + defer cancel() + actualIP, actualPort, actualTypeCode, _, err := handlePackets(ctx, test.conn, test.listener, test.localIP, test.localPort, test.remoteIP, test.remotePort, test.seqNum) + if test.errMsg != "" { + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), test.errMsg)) + return + } + require.NoError(t, err) + assert.Truef(t, test.expectedIP.Equal(actualIP), "mismatch source IPs: expected %s, got %s", test.expectedIP.String(), actualIP.String()) + assert.Equal(t, test.expectedPort, actualPort) + assert.Equal(t, test.expectedTypeCode, actualTypeCode) + }) + } +} + +func (m *mockRawConn) SetReadDeadline(t time.Time) error { + if m.setReadDeadlineErr != nil { + return m.setReadDeadlineErr + } + m.readDeadline = t + + return nil +} + +func (m *mockRawConn) ReadFrom(_ []byte) (*ipv4.Header, []byte, *ipv4.ControlMessage, error) { + if m.readTimeoutCount > 0 { + m.readTimeoutCount-- + time.Sleep(time.Until(m.readDeadline)) + return nil, nil, nil, &net.OpError{Err: mockTimeoutErr("test timeout error")} + } + if m.readFromErr != nil { + return nil, nil, nil, m.readFromErr + } + + return m.header, m.payload, m.cm, nil +} + +func (m *mockRawConn) WriteTo(_ *ipv4.Header, _ []byte, _ *ipv4.ControlMessage) error { + time.Sleep(m.writeDelay) + return m.writeToErr +} + +func (me mockTimeoutErr) Error() string { + return string(me) +} + +func (me mockTimeoutErr) Timeout() bool { + return true +} diff --git a/pkg/networkpath/traceroute/tcp/utils_windows.go b/pkg/networkpath/traceroute/tcp/utils_windows.go new file mode 100644 index 0000000000000..077495f43203e --- /dev/null +++ b/pkg/networkpath/traceroute/tcp/utils_windows.go @@ -0,0 +1,138 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package tcp adds a TCP traceroute implementation to the agent +package tcp + +import ( + "context" + "fmt" + "net" + "sync" + "time" + + "golang.org/x/net/ipv4" + "golang.org/x/sys/windows" + + "github.com/DataDog/datadog-agent/pkg/util/log" + "github.com/google/gopacket/layers" +) + +var ( + recvFrom = windows.Recvfrom +) + +// listenPackets takes in raw ICMP and TCP connections and listens for matching ICMP +// and TCP responses based on the passed in trace information. If neither listener +// receives a matching packet within the timeout, a blank response is returned. +// Once a matching packet is received by a listener, it will cause the other listener +// to be canceled, and data from the matching packet will be returned to the caller +func (w *winrawsocket) listenPackets(timeout time.Duration, localIP net.IP, localPort uint16, remoteIP net.IP, remotePort uint16, seqNum uint32) (net.IP, uint16, layers.ICMPv4TypeCode, time.Time, error) { + var icmpErr error + var wg sync.WaitGroup + var icmpIP net.IP + //var tcpIP net.IP + //var icmpCode layers.ICMPv4TypeCode + //var tcpFinished time.Time + var icmpFinished time.Time + var port uint16 + wg.Add(1) + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + go func() { + defer wg.Done() + defer cancel() + icmpIP, _, _, icmpFinished, icmpErr = w.handlePackets(ctx, localIP, localPort, remoteIP, remotePort, seqNum) + }() + wg.Wait() + + if icmpErr != nil { + _, icmpCanceled := icmpErr.(canceledError) + if icmpCanceled { + log.Trace("timed out waiting for responses") + return net.IP{}, 0, 0, time.Time{}, nil + } + if icmpErr != nil { + log.Errorf("ICMP listener error: %s", icmpErr.Error()) + } + + return net.IP{}, 0, 0, time.Time{}, fmt.Errorf("icmp error: %w", icmpErr) + } + + // return the TCP response + return icmpIP, port, 0, icmpFinished, nil +} + +// handlePackets in its current implementation should listen for the first matching +// packet on the connection and then return. If no packet is received within the +// timeout or if the listener is canceled, it should return a canceledError +func (w *winrawsocket) handlePackets(ctx context.Context, localIP net.IP, localPort uint16, remoteIP net.IP, remotePort uint16, seqNum uint32) (net.IP, uint16, layers.ICMPv4TypeCode, time.Time, error) { + buf := make([]byte, 512) + tp := newTCPParser() + for { + select { + case <-ctx.Done(): + return net.IP{}, 0, 0, time.Time{}, canceledError("listener canceled") + default: + } + + // the receive timeout is set to 100ms in the constructor, to match the + // linux side. This is a workaround for the lack of a deadline for sockets. + //err := conn.SetReadDeadline(now.Add(time.Millisecond * 100)) + n, _, err := recvFrom(w.s, buf, 0) + if err != nil { + if err == windows.WSAETIMEDOUT { + continue + } + if err == windows.WSAEMSGSIZE { + log.Warnf("Message too large for buffer") + continue + } + return nil, 0, 0, time.Time{}, err + } + log.Tracef("Got packet %+v", buf[:n]) + + if n < 20 { // min size of ipv4 header + continue + } + header, err := ipv4.ParseHeader(buf[:n]) + if err != nil { + continue + } + packet := buf[header.Len:header.TotalLen] + + // once we have a packet, take a timestamp to know when + // the response was received, if it matches, we will + // return this timestamp + received := time.Now() + // TODO: remove listener constraint and parse all packets + // in the same function return a succinct struct here + if header.Protocol == windows.IPPROTO_ICMP { + icmpResponse, err := parseICMP(header, packet) + if err != nil { + log.Tracef("failed to parse ICMP packet: %s", err.Error()) + continue + } + if icmpMatch(localIP, localPort, remoteIP, remotePort, seqNum, icmpResponse) { + return icmpResponse.SrcIP, 0, icmpResponse.TypeCode, received, nil + } + } else if header.Protocol == windows.IPPROTO_TCP { + // don't even bother parsing the packet if the src/dst ip don't match + if !localIP.Equal(header.Dst) || !remoteIP.Equal(header.Src) { + continue + } + tcpResp, err := tp.parseTCP(header, packet) + if err != nil { + log.Tracef("failed to parse TCP packet: %s", err.Error()) + continue + } + if tcpMatch(localIP, localPort, remoteIP, remotePort, seqNum, tcpResp) { + return tcpResp.SrcIP, uint16(tcpResp.TCPResponse.SrcPort), 0, received, nil + } + } else { + continue + } + } +} diff --git a/pkg/networkpath/traceroute/tcp/utils_windows_test.go b/pkg/networkpath/traceroute/tcp/utils_windows_test.go new file mode 100644 index 0000000000000..6e5b2a1c81ba4 --- /dev/null +++ b/pkg/networkpath/traceroute/tcp/utils_windows_test.go @@ -0,0 +1,155 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build test + +package tcp + +import ( + "context" + "errors" + "fmt" + "net" + "strings" + "testing" + "time" + + "golang.org/x/sys/windows" + + "github.com/google/gopacket/layers" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type ( + mockRawConn struct { + readTimeoutCount int + readTimeout time.Duration + readFromErr error + + payload []byte + } +) + +func Test_handlePackets(t *testing.T) { + _, tcpBytes := createMockTCPPacket(createMockIPv4Header(dstIP, srcIP, 6), createMockTCPLayer(443, 12345, 28394, 28395, true, true, true), true) + + tt := []struct { + description string + // input + ctxTimeout time.Duration + conn *mockRawConn + localIP net.IP + localPort uint16 + remoteIP net.IP + remotePort uint16 + seqNum uint32 + // output + expectedIP net.IP + expectedPort uint16 + expectedTypeCode layers.ICMPv4TypeCode + errMsg string + }{ + { + description: "canceled context returns canceledErr", + ctxTimeout: 300 * time.Millisecond, + conn: &mockRawConn{ + readTimeoutCount: 100, + readTimeout: 100 * time.Millisecond, + readFromErr: errors.New("bad test error"), + }, + errMsg: "canceled", + }, + { + description: "non-timeout read error returns an error", + ctxTimeout: 1 * time.Second, + conn: &mockRawConn{ + readFromErr: errors.New("test read error"), + }, + errMsg: "test read error", + }, + // { + // description: "failed ICMP parsing eventuallly returns cancel timeout", + // ctxTimeout: 500 * time.Millisecond, + // conn: &mockRawConn{ + // payload: nil, + // }, + // errMsg: "canceled", + // }, + // { + // description: "failed TCP parsing eventuallly returns cancel timeout", + // ctxTimeout: 500 * time.Millisecond, + // conn: &mockRawConn{ + // header: &ipv4.Header{}, + // payload: nil, + // }, + // listener: "tcp", + // errMsg: "canceled", + // }, + { + description: "successful ICMP parsing returns IP, port, and type code", + ctxTimeout: 500 * time.Millisecond, + conn: &mockRawConn{ + payload: createMockICMPPacket(createMockIPv4Layer(srcIP, dstIP, layers.IPProtocolICMPv4), createMockICMPLayer(layers.ICMPv4CodeTTLExceeded), createMockIPv4Layer(innerSrcIP, innerDstIP, layers.IPProtocolTCP), createMockTCPLayer(12345, 443, 28394, 12737, true, true, true), false), + }, + localIP: innerSrcIP, + localPort: 12345, + remoteIP: innerDstIP, + remotePort: 443, + seqNum: 28394, + expectedIP: srcIP, + expectedPort: 0, + expectedTypeCode: layers.ICMPv4CodeTTLExceeded, + }, + { + description: "successful TCP parsing returns IP, port, and type code", + ctxTimeout: 500 * time.Millisecond, + conn: &mockRawConn{ + payload: tcpBytes, + }, + localIP: srcIP, + localPort: 12345, + remoteIP: dstIP, + remotePort: 443, + seqNum: 28394, + expectedIP: dstIP, + expectedPort: 443, + expectedTypeCode: 0, + }, + } + + for _, test := range tt { + t.Run(test.description, func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), test.ctxTimeout) + defer cancel() + recvFrom = test.conn.RecvFrom + w := &winrawsocket{} + actualIP, actualPort, actualTypeCode, _, err := w.handlePackets(ctx, test.localIP, test.localPort, test.remoteIP, test.remotePort, test.seqNum) + if test.errMsg != "" { + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), test.errMsg), fmt.Sprintf("expected %q, got %q", test.errMsg, err.Error())) + return + } + require.NoError(t, err) + assert.Truef(t, test.expectedIP.Equal(actualIP), "mismatch source IPs: expected %s, got %s", test.expectedIP.String(), actualIP.String()) + assert.Equal(t, test.expectedPort, actualPort) + assert.Equal(t, test.expectedTypeCode, actualTypeCode) + }) + } +} + +func (m *mockRawConn) RecvFrom(_ windows.Handle, buf []byte, _ int) (int, windows.Sockaddr, error) { + if m.readTimeoutCount > 0 { + m.readTimeoutCount-- + time.Sleep(m.readTimeout) + return 0, nil, windows.WSAETIMEDOUT + } + if m.readFromErr != nil { + return 0, nil, m.readFromErr + } + copy(buf, m.payload) + + return len(m.payload), nil, nil +} diff --git a/pkg/networkpath/traceroute/traceroute_windows.go b/pkg/networkpath/traceroute/traceroute_windows.go index 8820ea1de8549..316caabd7cb03 100644 --- a/pkg/networkpath/traceroute/traceroute_windows.go +++ b/pkg/networkpath/traceroute/traceroute_windows.go @@ -10,6 +10,7 @@ package traceroute import ( "context" "encoding/json" + "errors" "github.com/DataDog/datadog-agent/comp/core/telemetry" pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" @@ -20,7 +21,8 @@ import ( ) const ( - clientID = "traceroute-agent-windows" + clientID = "traceroute-agent-windows" + udpNotSupportedWindowsMsg = "UDP traceroute is not currently supported on Windows" ) // WindowsTraceroute defines a structure for @@ -34,6 +36,12 @@ type WindowsTraceroute struct { // based on an input configuration func New(cfg config.Config, _ telemetry.Component) (*WindowsTraceroute, error) { log.Debugf("Creating new traceroute with config: %+v", cfg) + + // UDP is not supported at the moment + if cfg.Protocol == payload.ProtocolUDP { + return nil, errors.New(udpNotSupportedWindowsMsg) + } + return &WindowsTraceroute{ cfg: cfg, }, nil diff --git a/pkg/sbom/collectors/collectors.go b/pkg/sbom/collectors/collectors.go index de61f115c24c0..adeec30f9f27d 100644 --- a/pkg/sbom/collectors/collectors.go +++ b/pkg/sbom/collectors/collectors.go @@ -25,6 +25,8 @@ const ( HostScanType ScanType = "host" // ContainerdCollector is the name of the containerd collector ContainerdCollector = "containerd" + // CrioCollector is the name of the containerd collector + CrioCollector = "crio" // DockerCollector is the name of the docker collector DockerCollector = "docker" // HostCollector is the name of the host collector @@ -71,6 +73,11 @@ func GetContainerdScanner() Collector { return Collectors[ContainerdCollector] } +// GetCrioScanner returns the crio scanner +func GetCrioScanner() Collector { + return Collectors[CrioCollector] +} + // GetHostScanner returns the host scanner func GetHostScanner() Collector { return Collectors[HostCollector] diff --git a/pkg/sbom/collectors/crio/crio.go b/pkg/sbom/collectors/crio/crio.go new file mode 100644 index 0000000000000..92b8c5eb49f88 --- /dev/null +++ b/pkg/sbom/collectors/crio/crio.go @@ -0,0 +1,144 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build crio && trivy + +package crio + +import ( + "context" + "fmt" + "reflect" + + "github.com/DataDog/datadog-agent/comp/core/config" + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/sbom" + "github.com/DataDog/datadog-agent/pkg/sbom/collectors" + crioUtil "github.com/DataDog/datadog-agent/pkg/util/crio" + "github.com/DataDog/datadog-agent/pkg/util/optional" + "github.com/DataDog/datadog-agent/pkg/util/trivy" +) + +const resultChanSize = 1000 + +// scanRequest defines a scan request. This struct should be +// hashable to be pushed in the work queue for processing. +type scanRequest struct { + imageID string +} + +// NewScanRequest creates a new scan request +func NewScanRequest(imageID string) sbom.ScanRequest { + return scanRequest{imageID: imageID} +} + +// Collector returns the collector name for the scan request +func (r scanRequest) Collector() string { + return collectors.CrioCollector +} + +// Type returns the scan request type based on ScanOptions +func (r scanRequest) Type(_ sbom.ScanOptions) string { + return sbom.ScanFilesystemType +} + +// ID returns the scan request ID +func (r scanRequest) ID() string { + return r.imageID +} + +// Collector defines a CRI-O SBOM collector +type Collector struct { + trivyCollector *trivy.Collector + resChan chan sbom.ScanResult + opts sbom.ScanOptions + crioClient crioUtil.Client + wmeta optional.Option[workloadmeta.Component] + + closed bool +} + +// CleanCache cleans the cache in the trivy collector +func (c *Collector) CleanCache() error { + return c.trivyCollector.CleanCache() +} + +// Init initializes the collector with configuration and workloadmeta component +func (c *Collector) Init(cfg config.Component, wmeta optional.Option[workloadmeta.Component]) error { + trivyCollector, err := trivy.GetGlobalCollector(cfg, wmeta) + if err != nil { + return err + } + c.wmeta = wmeta + c.trivyCollector = trivyCollector + c.opts = sbom.ScanOptionsFromConfig(cfg, true) + return nil +} + +// Scan performs the scan using CRI-O methods +func (c *Collector) Scan(ctx context.Context, request sbom.ScanRequest) sbom.ScanResult { + crioScanRequest, ok := request.(scanRequest) + if !ok { + return sbom.ScanResult{Error: fmt.Errorf("invalid request type '%s' for CRI-O collector", reflect.TypeOf(request))} + } + + if c.crioClient == nil { + cl, err := crioUtil.NewCRIOClient() + if err != nil { + return sbom.ScanResult{Error: fmt.Errorf("error creating CRI-O client: %w", err)} + } + c.crioClient = cl + } + + wmeta, ok := c.wmeta.Get() + if !ok { + return sbom.ScanResult{Error: fmt.Errorf("workloadmeta store is not initialized")} + } + + imageMeta, err := wmeta.GetImage(crioScanRequest.ID()) + if err != nil { + return sbom.ScanResult{Error: fmt.Errorf("image metadata not found for image ID %s: %w", crioScanRequest.ID(), err)} + } + + scanner := c.trivyCollector.ScanCRIOImageFromOverlayFS + report, err := scanner(ctx, imageMeta, c.crioClient, c.opts) + + scanResult := sbom.ScanResult{ + Error: err, + Report: report, + ImgMeta: imageMeta, + } + + return scanResult +} + +// Type returns the container image scan type +func (c *Collector) Type() collectors.ScanType { + return collectors.ContainerImageScanType +} + +// Channel returns the channel to send scan results +func (c *Collector) Channel() chan sbom.ScanResult { + return c.resChan +} + +// Options returns the collector options +func (c *Collector) Options() sbom.ScanOptions { + return c.opts +} + +// Shutdown shuts down the collector +func (c *Collector) Shutdown() { + if c.resChan != nil && !c.closed { + close(c.resChan) + } + c.closed = true +} + +func init() { + collectors.RegisterCollector(collectors.CrioCollector, &Collector{ + resChan: make(chan sbom.ScanResult, resultChanSize), + }) +} diff --git a/pkg/sbom/collectors/crio/doc.go b/pkg/sbom/collectors/crio/doc.go new file mode 100644 index 0000000000000..f4d505ce15fd0 --- /dev/null +++ b/pkg/sbom/collectors/crio/doc.go @@ -0,0 +1,7 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package crio holds crio related files +package crio diff --git a/pkg/security/ebpf/c/include/events_definition.h b/pkg/security/ebpf/c/include/events_definition.h index 6611da172b380..ef52f5edcf386 100644 --- a/pkg/security/ebpf/c/include/events_definition.h +++ b/pkg/security/ebpf/c/include/events_definition.h @@ -324,6 +324,7 @@ struct ptrace_event_t { u32 request; u32 pid; u64 addr; + u32 ns_pid; }; struct syscall_monitor_event_t { diff --git a/pkg/security/ebpf/c/include/hooks/ptrace.h b/pkg/security/ebpf/c/include/hooks/ptrace.h index 358966d12c2ac..187d424665b4b 100644 --- a/pkg/security/ebpf/c/include/hooks/ptrace.h +++ b/pkg/security/ebpf/c/include/hooks/ptrace.h @@ -11,6 +11,7 @@ HOOK_SYSCALL_ENTRY3(ptrace, u32, request, pid_t, pid, void *, addr) { .ptrace = { .request = request, .pid = 0, // 0 in case the root ns pid resolution failed + .ns_pid = (u32)pid, .addr = (u64)addr, } }; @@ -59,6 +60,7 @@ int __attribute__((always_inline)) sys_ptrace_ret(void *ctx, int retval) { .request = syscall->ptrace.request, .pid = syscall->ptrace.pid, .addr = syscall->ptrace.addr, + .ns_pid = syscall->ptrace.ns_pid, }; struct proc_cache_t *entry = fill_process_context(&event.process); diff --git a/pkg/security/ebpf/c/include/structs/syscalls.h b/pkg/security/ebpf/c/include/structs/syscalls.h index 31a2fc2b9f003..9515fa07df56b 100644 --- a/pkg/security/ebpf/c/include/structs/syscalls.h +++ b/pkg/security/ebpf/c/include/structs/syscalls.h @@ -156,6 +156,7 @@ struct syscall_cache_t { u32 request; u32 pid; u64 addr; + u32 ns_pid; } ptrace; struct { diff --git a/pkg/security/module/ecs_tags.go b/pkg/security/module/ecs_tags.go deleted file mode 100644 index 92564bd944acf..0000000000000 --- a/pkg/security/module/ecs_tags.go +++ /dev/null @@ -1,53 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2016-present Datadog, Inc. - -//go:build docker - -// Package module holds module related files -package module - -import ( - "context" - "strings" - "time" - - ecsmeta "github.com/DataDog/datadog-agent/pkg/util/ecs/metadata" -) - -func getCurrentECSTaskTags() (map[string]string, error) { - client, err := ecsmeta.V3orV4FromCurrentTask() - if err != nil { - return nil, err - } - - ctx, cancel := context.WithTimeout(context.Background(), time.Duration(5*time.Second)) - defer cancel() - - task, err := client.GetTask(ctx) - if err != nil { - return nil, err - } - - cont, err := client.GetContainer(ctx) - if err != nil { - return nil, err - } - imageName := cont.Name - imageTag := "" - image := strings.Split(cont.Image, ":") - if len(image) == 2 { - imageName = image[0] - imageTag = image[1] - } - - return map[string]string{ - "task_name": task.Family, - "task_family": task.Family, - "task_arn": task.TaskARN, - "task_version": task.Version, - "image_name": imageName, - "image_tag": imageTag, - }, nil -} diff --git a/pkg/security/module/server.go b/pkg/security/module/server.go index 61ebecea1bbe0..207525cd4909f 100644 --- a/pkg/security/module/server.go +++ b/pkg/security/module/server.go @@ -21,7 +21,7 @@ import ( "github.com/mailru/easyjson" "go.uber.org/atomic" - "github.com/DataDog/datadog-agent/pkg/config/env" + "github.com/DataDog/datadog-agent/comp/core/tagger/types" pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" "github.com/DataDog/datadog-agent/pkg/security/common" "github.com/DataDog/datadog-agent/pkg/security/config" @@ -38,6 +38,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/security/seclog" "github.com/DataDog/datadog-agent/pkg/security/serializers" "github.com/DataDog/datadog-agent/pkg/security/utils" + "github.com/DataDog/datadog-agent/pkg/util/fargate" "github.com/DataDog/datadog-agent/pkg/util/log" "github.com/DataDog/datadog-agent/pkg/util/startstop" "github.com/DataDog/datadog-agent/pkg/version" @@ -62,7 +63,8 @@ type pendingMsg struct { func (p *pendingMsg) isResolved() bool { for _, report := range p.actionReports { - if !report.IsResolved() { + if err := report.IsResolved(); err != nil { + seclog.Debugf("action report not resolved: %v", err) return false } } @@ -125,7 +127,6 @@ type APIServer struct { policiesStatusLock sync.RWMutex policiesStatus []*api.PolicyStatus msgSender MsgSender - ecsTags map[string]string stopChan chan struct{} stopper startstop.Stopper @@ -218,14 +219,15 @@ func (a *APIServer) dequeue(now time.Time, cb func(msg *pendingMsg) bool) { }) } -func (a *APIServer) updateMsgTags(msg *api.SecurityEventMessage) { - // apply ecs tag if possible - if a.ecsTags != nil { - for key, value := range a.ecsTags { - if !slices.ContainsFunc(msg.Tags, func(tag string) bool { - return strings.HasPrefix(tag, key+":") +func (a *APIServer) updateMsgTags(msg *api.SecurityEventMessage, includeGlobalTags bool) { + // on fargate, append global tags + if includeGlobalTags && fargate.IsFargateInstance() { + for _, tag := range a.getGlobalTags() { + key, _, _ := strings.Cut(tag, ":") + if !slices.ContainsFunc(msg.Tags, func(t string) bool { + return strings.HasPrefix(t, key+":") }) { - msg.Tags = append(msg.Tags, key+":"+value) + msg.Tags = append(msg.Tags, tag) } } } @@ -277,7 +279,7 @@ func (a *APIServer) start(ctx context.Context) { Service: msg.service, Tags: msg.tags, } - a.updateMsgTags(m) + a.updateMsgTags(m, false) a.msgSender.Send(m, a.expireEvent) @@ -403,7 +405,7 @@ func (a *APIServer) SendEvent(rule *rules.Rule, event events.Event, extTagsCb fu Service: service, Tags: tags, } - a.updateMsgTags(m) + a.updateMsgTags(m, true) a.msgSender.Send(m, a.expireEvent) } @@ -540,6 +542,21 @@ func (a *APIServer) SetCWSConsumer(consumer *CWSConsumer) { a.cwsConsumer = consumer } +func (a *APIServer) getGlobalTags() []string { + tagger := a.probe.Opts.Tagger + + if tagger == nil { + return nil + } + + globalTags, err := tagger.GlobalTags(types.OrchestratorCardinality) + if err != nil { + seclog.Errorf("failed to get global tags: %v", err) + return nil + } + return globalTags +} + // NewAPIServer returns a new gRPC event server func NewAPIServer(cfg *config.RuntimeSecurityConfig, probe *sprobe.Probe, msgSender MsgSender, client statsd.ClientInterface, selfTester *selftests.SelfTester) (*APIServer, error) { stopper := startstop.NewSerialStopper() @@ -574,13 +591,5 @@ func NewAPIServer(cfg *config.RuntimeSecurityConfig, probe *sprobe.Probe, msgSen } } - if env.IsECS() || env.IsECSFargate() { - tags, err := getCurrentECSTaskTags() - if err != nil { - return nil, err - } - as.ecsTags = tags - } - return as, nil } diff --git a/pkg/security/probe/actions.go b/pkg/security/probe/actions.go index 3e53dcc77c6a8..24af2b8b6a5f6 100644 --- a/pkg/security/probe/actions.go +++ b/pkg/security/probe/actions.go @@ -9,6 +9,7 @@ package probe import ( + "fmt" "sync" "time" @@ -62,12 +63,15 @@ type JKillActionReport struct { } // IsResolved return if the action is resolved -func (k *KillActionReport) IsResolved() bool { +func (k *KillActionReport) IsResolved() error { k.RLock() defer k.RUnlock() // for sigkill wait for exit - return k.Signal != "SIGKILL" || k.resolved || k.Status == KillActionStatusRuleDisarmed + if k.Signal != "SIGKILL" || k.resolved || k.Status == KillActionStatusRuleDisarmed { + return nil + } + return fmt.Errorf("kill action current state: %+v", k) } // ToJSON marshal the action diff --git a/pkg/security/probe/actions_linux.go b/pkg/security/probe/actions_linux.go index 54126bb2b12f5..fe2ac84377d97 100644 --- a/pkg/security/probe/actions_linux.go +++ b/pkg/security/probe/actions_linux.go @@ -9,6 +9,7 @@ package probe import ( + "fmt" "sync" "time" @@ -48,11 +49,15 @@ type HashActionReport struct { } // IsResolved return if the action is resolved -func (k *HashActionReport) IsResolved() bool { +func (k *HashActionReport) IsResolved() error { k.RLock() defer k.RUnlock() - return k.resolved + if k.resolved { + return nil + } + + return fmt.Errorf("hash action current state: %+v", k) } // ToJSON marshal the action diff --git a/pkg/security/probe/custom_events.go b/pkg/security/probe/custom_events.go index 13c31b8b4bc47..6d1d61bc83a93 100644 --- a/pkg/security/probe/custom_events.go +++ b/pkg/security/probe/custom_events.go @@ -11,11 +11,14 @@ package probe import ( + coretags "github.com/DataDog/datadog-agent/comp/core/tagger/tags" "github.com/DataDog/datadog-agent/pkg/process/procutil" "github.com/DataDog/datadog-agent/pkg/security/events" "github.com/DataDog/datadog-agent/pkg/security/proto/ebpfless" + "github.com/DataDog/datadog-agent/pkg/security/resolvers/tags" "github.com/DataDog/datadog-agent/pkg/security/secl/model" "github.com/DataDog/datadog-agent/pkg/security/secl/rules" + "github.com/DataDog/datadog-agent/pkg/security/seclog" "github.com/DataDog/datadog-agent/pkg/security/serializers" "github.com/DataDog/datadog-agent/pkg/security/utils" ) @@ -71,7 +74,7 @@ func (e EBPFLessHelloMsgEvent) ToJSON() ([]byte, error) { } // NewEBPFLessHelloMsgEvent returns a eBPFLess hello custom event -func NewEBPFLessHelloMsgEvent(acc *events.AgentContainerContext, msg *ebpfless.HelloMsg, scrubber *procutil.DataScrubber) (*rules.Rule, *events.CustomEvent) { +func NewEBPFLessHelloMsgEvent(acc *events.AgentContainerContext, msg *ebpfless.HelloMsg, scrubber *procutil.DataScrubber, tagger tags.Tagger) (*rules.Rule, *events.CustomEvent) { args := msg.EntrypointArgs if scrubber != nil { args, _ = scrubber.ScrubCommand(msg.EntrypointArgs) @@ -81,9 +84,18 @@ func NewEBPFLessHelloMsgEvent(acc *events.AgentContainerContext, msg *ebpfless.H NSID: msg.NSID, } evt.Container.ID = msg.ContainerContext.ID - evt.Container.Name = msg.ContainerContext.Name - evt.Container.ImageShortName = msg.ContainerContext.ImageShortName - evt.Container.ImageTag = msg.ContainerContext.ImageTag + + if tagger != nil { + tags, err := tags.GetTagsOfContainer(tagger, msg.ContainerContext.ID) + if err != nil { + seclog.Errorf("Failed to get tags for container %s: %v", msg.ContainerContext.ID, err) + } else { + evt.Container.Name = utils.GetTagValue(coretags.EcsContainerName, tags) + evt.Container.ImageShortName = utils.GetTagValue(coretags.ShortImage, tags) + evt.Container.ImageTag = utils.GetTagValue(coretags.ImageTag, tags) + } + } + evt.EntrypointArgs = args evt.FillCustomEventCommonFields(acc) diff --git a/pkg/security/probe/field_handlers_ebpf.go b/pkg/security/probe/field_handlers_ebpf.go index 98dd813e7b55a..12749266a8a56 100644 --- a/pkg/security/probe/field_handlers_ebpf.go +++ b/pkg/security/probe/field_handlers_ebpf.go @@ -577,9 +577,8 @@ func (fh *EBPFFieldHandlers) ResolveContainerCreatedAt(ev *model.Event, e *model // ResolveContainerTags resolves the container tags of the event func (fh *EBPFFieldHandlers) ResolveContainerTags(_ *model.Event, e *model.ContainerContext) []string { - if (!e.TagsResolved || len(e.Tags) == 0) && e.ContainerID != "" { + if len(e.Tags) == 0 && e.ContainerID != "" { e.Tags = fh.resolvers.TagsResolver.Resolve(string(e.ContainerID)) - e.TagsResolved = true } return e.Tags } diff --git a/pkg/security/probe/field_handlers_ebpfless.go b/pkg/security/probe/field_handlers_ebpfless.go index f5f7de0f6549d..1412808889772 100644 --- a/pkg/security/probe/field_handlers_ebpfless.go +++ b/pkg/security/probe/field_handlers_ebpfless.go @@ -197,10 +197,8 @@ func (fh *EBPFLessFieldHandlers) ResolveContainerCreatedAt(ev *model.Event, e *m // ResolveContainerTags resolves the container tags of the event func (fh *EBPFLessFieldHandlers) ResolveContainerTags(_ *model.Event, e *model.ContainerContext) []string { - // e.Tags is never empty because of image name and tag - if (!e.TagsResolved) && e.ContainerID != "" { + if len(e.Tags) == 0 && e.ContainerID != "" { e.Tags = fh.resolvers.TagsResolver.Resolve(string(e.ContainerID)) - e.TagsResolved = true } return e.Tags } diff --git a/pkg/security/probe/opts_others.go b/pkg/security/probe/opts_others.go index 59cf8dccb1453..03c5f82382a78 100644 --- a/pkg/security/probe/opts_others.go +++ b/pkg/security/probe/opts_others.go @@ -8,8 +8,12 @@ // Package probe holds probe related files package probe +import "github.com/DataDog/datadog-agent/pkg/security/resolvers/tags" + // Opts defines some probe options type Opts struct { // DontDiscardRuntime do not discard the runtime. Mostly used by functional tests DontDiscardRuntime bool + // Tagger will override the default one. Mainly here for tests. + Tagger tags.Tagger } diff --git a/pkg/security/probe/probe_ebpf.go b/pkg/security/probe/probe_ebpf.go index ad07856a00e42..6db379021bbc7 100644 --- a/pkg/security/probe/probe_ebpf.go +++ b/pkg/security/probe/probe_ebpf.go @@ -32,7 +32,6 @@ import ( manager "github.com/DataDog/ebpf-manager" "github.com/DataDog/ebpf-manager/tracefs" - "github.com/DataDog/datadog-agent/comp/core/telemetry" "github.com/DataDog/datadog-agent/pkg/config/env" ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" ebpftelemetry "github.com/DataDog/datadog-agent/pkg/ebpf/telemetry" @@ -480,7 +479,6 @@ func (p *EBPFProbe) Setup() error { if err := p.Manager.Start(); err != nil { return err } - ddebpf.AddNameMappings(p.Manager, "cws") p.applyDefaultFilterPolicies() @@ -686,6 +684,9 @@ func (p *EBPFProbe) setProcessContext(eventType model.EventType, event *model.Ev panic("should always return a process context") } + // do the same with cgroup context + event.CGroupContext = event.ProcessCacheEntry.CGroup + if process.IsKThread(event.ProcessContext.PPid, event.ProcessContext.Pid) { return false } @@ -1073,14 +1074,42 @@ func (p *EBPFProbe) handleEvent(CPU int, data []byte) { seclog.Errorf("failed to decode ptrace event: %s (offset %d, len %d)", err, offset, len(data)) return } + // resolve tracee process context var pce *model.ProcessCacheEntry - if event.PTrace.PID == 0 { // pid can be 0 for a PTRACE_TRACEME request + if event.PTrace.Request == unix.PTRACE_TRACEME { // pid can be 0 for a PTRACE_TRACEME request pce = newPlaceholderProcessCacheEntryPTraceMe() + } else if event.PTrace.PID == 0 && event.PTrace.NSPID == 0 { + seclog.Errorf("ptrace event without any PID to resolve") + return } else { - pce = p.Resolvers.ProcessResolver.Resolve(event.PTrace.PID, event.PTrace.PID, 0, false, newEntryCb) + pidToResolve := event.PTrace.PID + + if pidToResolve == 0 { // resolve the PID given as argument instead + if event.ContainerContext.ContainerID == "" { + pidToResolve = event.PTrace.NSPID + } else { + // 1. get the pid namespace of the tracer + ns, err := utils.GetProcessPidNamespace(event.ProcessContext.Process.Pid) + if err != nil { + seclog.Errorf("Failed to resolve PID namespace: %v", err) + return + } + + // 2. find the host pid matching the arg pid with he tracer namespace + pid, err := utils.FindPidNamespace(event.PTrace.NSPID, ns) + if err != nil { + seclog.Warnf("Failed to resolve tracee PID namespace: %v", err) + return + } + + pidToResolve = pid + } + } + + pce = p.Resolvers.ProcessResolver.Resolve(pidToResolve, pidToResolve, 0, false, newEntryCb) if pce == nil { - pce = model.NewPlaceholderProcessCacheEntry(event.PTrace.PID, event.PTrace.PID, false) + pce = model.NewPlaceholderProcessCacheEntry(pidToResolve, pidToResolve, false) } } event.PTrace.Tracee = &pce.ProcessContext @@ -1495,7 +1524,52 @@ func (p *EBPFProbe) updateProbes(ruleEventTypes []eval.EventType, needRawSyscall return fmt.Errorf("failed to set enabled events: %w", err) } - return p.Manager.UpdateActivatedProbes(activatedProbes) + previousProbes := p.computeProbesIDs() + if err = p.Manager.UpdateActivatedProbes(activatedProbes); err != nil { + return err + } + newProbes := p.computeProbesIDs() + + p.computeProbesDiffAndRemoveMapping(previousProbes, newProbes) + return nil +} + +func (p *EBPFProbe) computeProbesIDs() map[string]lib.ProgramID { + out := make(map[string]lib.ProgramID) + progList, err := p.Manager.GetPrograms() + if err != nil { + return out + } + for funcName, prog := range progList { + programInfo, err := prog.Info() + if err != nil { + continue + } + + programID, isAvailable := programInfo.ID() + if isAvailable { + out[funcName] = programID + } + } + + return out +} + +func (p *EBPFProbe) computeProbesDiffAndRemoveMapping(previousProbes map[string]lib.ProgramID, newProbes map[string]lib.ProgramID) { + // Compute the list of programs that need to be deleted from the ddebpf mapping + var toDelete []lib.ProgramID + for previousProbeFuncName, programID := range previousProbes { + if _, ok := newProbes[previousProbeFuncName]; !ok { + toDelete = append(toDelete, programID) + } + } + + for _, id := range toDelete { + ddebpf.RemoveProgramID(uint32(id), "cws") + } + + // new programs could have been introduced during the update, add them now + ddebpf.AddNameMappings(p.Manager, "cws") } // GetDiscarders retrieve the discarders @@ -1852,7 +1926,7 @@ func (p *EBPFProbe) EnableEnforcement(state bool) { } // NewEBPFProbe instantiates a new runtime security agent probe -func NewEBPFProbe(probe *Probe, config *config.Config, opts Opts, telemetry telemetry.Component) (*EBPFProbe, error) { +func NewEBPFProbe(probe *Probe, config *config.Config, opts Opts) (*EBPFProbe, error) { nerpc, err := erpc.NewERPC() if err != nil { return nil, err @@ -2116,7 +2190,7 @@ func NewEBPFProbe(probe *Probe, config *config.Config, opts Opts, telemetry tele TTYFallbackEnabled: probe.Opts.TTYFallbackEnabled, } - p.Resolvers, err = resolvers.NewEBPFResolvers(config, p.Manager, probe.StatsdClient, probe.scrubber, p.Erpc, resolversOpts, telemetry) + p.Resolvers, err = resolvers.NewEBPFResolvers(config, p.Manager, probe.StatsdClient, probe.scrubber, p.Erpc, resolversOpts) if err != nil { return nil, err } diff --git a/pkg/security/probe/probe_ebpfless.go b/pkg/security/probe/probe_ebpfless.go index 124f295a03131..aea829794c956 100644 --- a/pkg/security/probe/probe_ebpfless.go +++ b/pkg/security/probe/probe_ebpfless.go @@ -25,7 +25,6 @@ import ( "github.com/DataDog/datadog-go/v5/statsd" - "github.com/DataDog/datadog-agent/comp/core/telemetry" "github.com/DataDog/datadog-agent/pkg/security/config" "github.com/DataDog/datadog-agent/pkg/security/events" "github.com/DataDog/datadog-agent/pkg/security/probe/kfilters" @@ -97,15 +96,14 @@ func (p *EBPFLessProbe) handleClientMsg(cl *client, msg *ebpfless.Message) { case ebpfless.MessageTypeHello: if cl.nsID == 0 { p.probe.DispatchCustomEvent( - NewEBPFLessHelloMsgEvent(p.GetAgentContainerContext(), msg.Hello, p.probe.scrubber), + NewEBPFLessHelloMsgEvent(p.GetAgentContainerContext(), msg.Hello, p.probe.scrubber, p.probe.Opts.Tagger), ) cl.nsID = msg.Hello.NSID if msg.Hello.ContainerContext != nil { cl.containerID = msg.Hello.ContainerContext.ID - cl.containerName = msg.Hello.ContainerContext.Name p.containerContexts[msg.Hello.ContainerContext.ID] = msg.Hello.ContainerContext - seclog.Infof("tracing started for container ID [%s] (Name: [%s]) with entrypoint %q", msg.Hello.ContainerContext.ID, msg.Hello.ContainerContext.Name, msg.Hello.EntrypointArgs) + seclog.Infof("tracing started for container ID [%s] with entrypoint %q", msg.Hello.ContainerContext.ID, msg.Hello.EntrypointArgs) } } case ebpfless.MessageTypeSyscall: @@ -304,10 +302,6 @@ func (p *EBPFLessProbe) handleSyscallMsg(cl *client, syscallMsg *ebpfless.Syscal event.ContainerContext.ContainerID = containerutils.ContainerID(syscallMsg.ContainerID) if containerContext, exists := p.containerContexts[syscallMsg.ContainerID]; exists { event.ContainerContext.CreatedAt = containerContext.CreatedAt - event.ContainerContext.Tags = []string{ - "image_name:" + containerContext.ImageShortName, - "image_tag:" + containerContext.ImageTag, - } } // copy span context if any @@ -660,7 +654,7 @@ func (p *EBPFLessProbe) GetAgentContainerContext() *events.AgentContainerContext } // NewEBPFLessProbe returns a new eBPF less probe -func NewEBPFLessProbe(probe *Probe, config *config.Config, opts Opts, telemetry telemetry.Component) (*EBPFLessProbe, error) { +func NewEBPFLessProbe(probe *Probe, config *config.Config, opts Opts) (*EBPFLessProbe, error) { opts.normalize() processKiller, err := NewProcessKiller(config) @@ -688,7 +682,7 @@ func NewEBPFLessProbe(probe *Probe, config *config.Config, opts Opts, telemetry Tagger: opts.Tagger, } - p.Resolvers, err = resolvers.NewEBPFLessResolvers(config, p.statsdClient, probe.scrubber, resolversOpts, telemetry) + p.Resolvers, err = resolvers.NewEBPFLessResolvers(config, p.statsdClient, probe.scrubber, resolversOpts) if err != nil { return nil, err } diff --git a/pkg/security/probe/probe_linux.go b/pkg/security/probe/probe_linux.go index 4f56171c12892..83c2c4800215d 100644 --- a/pkg/security/probe/probe_linux.go +++ b/pkg/security/probe/probe_linux.go @@ -7,7 +7,6 @@ package probe import ( - "github.com/DataDog/datadog-agent/comp/core/telemetry" "github.com/DataDog/datadog-agent/pkg/security/config" "github.com/DataDog/datadog-agent/pkg/security/ebpf/kernel" "github.com/DataDog/datadog-agent/pkg/security/events" @@ -23,7 +22,7 @@ const ( ) // NewProbe instantiates a new runtime security agent probe -func NewProbe(config *config.Config, opts Opts, telemetry telemetry.Component) (*Probe, error) { +func NewProbe(config *config.Config, opts Opts) (*Probe, error) { opts.normalize() p := newProbe(config, opts) @@ -34,14 +33,14 @@ func NewProbe(config *config.Config, opts Opts, telemetry telemetry.Component) ( } if opts.EBPFLessEnabled { - pp, err := NewEBPFLessProbe(p, config, opts, telemetry) + pp, err := NewEBPFLessProbe(p, config, opts) if err != nil { return nil, err } p.PlatformProbe = pp p.agentContainerContext = acc } else { - pp, err := NewEBPFProbe(p, config, opts, telemetry) + pp, err := NewEBPFProbe(p, config, opts) if err != nil { return nil, err } diff --git a/pkg/security/probe/probe_windows.go b/pkg/security/probe/probe_windows.go index 02761f8d95fd0..a1aea80b06594 100644 --- a/pkg/security/probe/probe_windows.go +++ b/pkg/security/probe/probe_windows.go @@ -18,7 +18,6 @@ import ( "github.com/cenkalti/backoff/v4" lru "github.com/hashicorp/golang-lru/v2" - "github.com/DataDog/datadog-agent/comp/core/telemetry" "github.com/DataDog/datadog-agent/comp/etw" etwimpl "github.com/DataDog/datadog-agent/comp/etw/impl" "github.com/DataDog/datadog-agent/pkg/security/config" @@ -1290,7 +1289,7 @@ func initializeWindowsProbe(config *config.Config, opts Opts) (*WindowsProbe, er } // NewWindowsProbe instantiates a new runtime security agent probe -func NewWindowsProbe(probe *Probe, config *config.Config, opts Opts, telemetry telemetry.Component) (*WindowsProbe, error) { +func NewWindowsProbe(probe *Probe, config *config.Config, opts Opts) (*WindowsProbe, error) { p, err := initializeWindowsProbe(config, opts) if err != nil { return nil, err @@ -1300,7 +1299,7 @@ func NewWindowsProbe(probe *Probe, config *config.Config, opts Opts, telemetry t resolversOpts := resolvers.Opts{ Tagger: probe.Opts.Tagger, } - p.Resolvers, err = resolvers.NewResolvers(config, p.statsdClient, probe.scrubber, telemetry, resolversOpts) + p.Resolvers, err = resolvers.NewResolvers(config, p.statsdClient, probe.scrubber, resolversOpts) if err != nil { return nil, err } @@ -1477,12 +1476,12 @@ func (p *WindowsProbe) EnableEnforcement(state bool) { } // NewProbe instantiates a new runtime security agent probe -func NewProbe(config *config.Config, opts Opts, telemetry telemetry.Component) (*Probe, error) { +func NewProbe(config *config.Config, opts Opts) (*Probe, error) { opts.normalize() p := newProbe(config, opts) - pp, err := NewWindowsProbe(p, config, opts, telemetry) + pp, err := NewWindowsProbe(p, config, opts) if err != nil { return nil, err } diff --git a/pkg/security/proto/ebpfless/msg.go b/pkg/security/proto/ebpfless/msg.go index b683da4dff0f1..7ca6d7a8d5888 100644 --- a/pkg/security/proto/ebpfless/msg.go +++ b/pkg/security/proto/ebpfless/msg.go @@ -93,11 +93,8 @@ const ( // ContainerContext defines a container context type ContainerContext struct { - ID string - Name string - ImageShortName string - ImageTag string - CreatedAt uint64 + ID string + CreatedAt uint64 } // FcntlSyscallMsg defines a fcntl message diff --git a/pkg/security/ptracer/container_context.go b/pkg/security/ptracer/container_context.go index e6a1375aaf6cd..27af2e24ef269 100644 --- a/pkg/security/ptracer/container_context.go +++ b/pkg/security/ptracer/container_context.go @@ -13,16 +13,12 @@ import ( "time" "github.com/DataDog/datadog-agent/pkg/security/proto/ebpfless" - "github.com/DataDog/datadog-agent/pkg/util/containers/image" ) // ECSMetadata defines ECS metadata // https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v4.html type ECSMetadata struct { - DockerID string `json:"DockerId"` - DockerName string `json:"DockerName"` - Name string `json:"Name"` - Image string `json:"Image"` + DockerID string `json:"DockerId"` } func retrieveECSMetadata(url string) (*ECSMetadata, error) { @@ -43,10 +39,6 @@ func retrieveEnvMetadata(ctx *ebpfless.ContainerContext) { if id := os.Getenv("DD_CONTAINER_ID"); id != "" { ctx.ID = id } - - if name := os.Getenv("DD_CONTAINER_NAME"); name != "" { - ctx.Name = name - } } func newContainerContext(containerID string) (*ebpfless.ContainerContext, error) { @@ -64,20 +56,6 @@ func newContainerContext(containerID string) (*ebpfless.ContainerContext, error) // only set the container ID if we previously failed to retrieve it from proc ctx.ID = data.DockerID } - if data.DockerName != "" { - ctx.Name = data.DockerName - } - if data.Image != "" { - _, _, shortImageName, tag, err := image.SplitImageName(data.Image) - if err == nil { - ctx.ImageShortName = shortImageName - if tag != "" { - ctx.ImageTag = tag - } else { - ctx.ImageTag = "latest" - } - } - } } } retrieveEnvMetadata(ctx) diff --git a/pkg/security/resolvers/resolvers_ebpf.go b/pkg/security/resolvers/resolvers_ebpf.go index 39343334d1b8d..3326a9ba50d27 100644 --- a/pkg/security/resolvers/resolvers_ebpf.go +++ b/pkg/security/resolvers/resolvers_ebpf.go @@ -17,7 +17,6 @@ import ( "github.com/DataDog/datadog-go/v5/statsd" manager "github.com/DataDog/ebpf-manager" - "github.com/DataDog/datadog-agent/comp/core/telemetry" "github.com/DataDog/datadog-agent/pkg/process/procutil" "github.com/DataDog/datadog-agent/pkg/security/config" "github.com/DataDog/datadog-agent/pkg/security/probe/erpc" @@ -64,7 +63,7 @@ type EBPFResolvers struct { } // NewEBPFResolvers creates a new instance of EBPFResolvers -func NewEBPFResolvers(config *config.Config, manager *manager.Manager, statsdClient statsd.ClientInterface, scrubber *procutil.DataScrubber, eRPC *erpc.ERPC, opts Opts, telemetry telemetry.Component) (*EBPFResolvers, error) { +func NewEBPFResolvers(config *config.Config, manager *manager.Manager, statsdClient statsd.ClientInterface, scrubber *procutil.DataScrubber, eRPC *erpc.ERPC, opts Opts) (*EBPFResolvers, error) { dentryResolver, err := dentry.NewResolver(config.Probe, statsdClient, eRPC) if err != nil { return nil, err @@ -96,7 +95,7 @@ func NewEBPFResolvers(config *config.Config, manager *manager.Manager, statsdCli return nil, err } - tagsResolver := tags.NewResolver(telemetry, opts.Tagger, cgroupsResolver) + tagsResolver := tags.NewResolver(opts.Tagger, cgroupsResolver) userGroupResolver, err := usergroup.NewResolver(cgroupsResolver) if err != nil { diff --git a/pkg/security/resolvers/resolvers_ebpfless.go b/pkg/security/resolvers/resolvers_ebpfless.go index 6713106a1f86c..25c799fbfa06f 100644 --- a/pkg/security/resolvers/resolvers_ebpfless.go +++ b/pkg/security/resolvers/resolvers_ebpfless.go @@ -13,7 +13,6 @@ import ( "github.com/DataDog/datadog-go/v5/statsd" - "github.com/DataDog/datadog-agent/comp/core/telemetry" "github.com/DataDog/datadog-agent/pkg/process/procutil" "github.com/DataDog/datadog-agent/pkg/security/config" "github.com/DataDog/datadog-agent/pkg/security/resolvers/cgroup" @@ -32,13 +31,13 @@ type EBPFLessResolvers struct { } // NewEBPFLessResolvers creates a new instance of EBPFLessResolvers -func NewEBPFLessResolvers(config *config.Config, statsdClient statsd.ClientInterface, scrubber *procutil.DataScrubber, opts Opts, telemetry telemetry.Component) (*EBPFLessResolvers, error) { +func NewEBPFLessResolvers(config *config.Config, statsdClient statsd.ClientInterface, scrubber *procutil.DataScrubber, opts Opts) (*EBPFLessResolvers, error) { cgroupsResolver, err := cgroup.NewResolver() if err != nil { return nil, err } - tagsResolver := tags.NewResolver(telemetry, opts.Tagger, cgroupsResolver) + tagsResolver := tags.NewResolver(opts.Tagger, cgroupsResolver) processOpts := process.NewResolverOpts() processOpts.WithEnvsValue(config.Probe.EnvsWithValue) diff --git a/pkg/security/resolvers/resolvers_windows.go b/pkg/security/resolvers/resolvers_windows.go index 1ff1d0354bb94..6b90baae8a00d 100644 --- a/pkg/security/resolvers/resolvers_windows.go +++ b/pkg/security/resolvers/resolvers_windows.go @@ -9,7 +9,6 @@ package resolvers import ( "github.com/DataDog/datadog-go/v5/statsd" - "github.com/DataDog/datadog-agent/comp/core/telemetry" "github.com/DataDog/datadog-agent/pkg/process/procutil" "github.com/DataDog/datadog-agent/pkg/security/config" "github.com/DataDog/datadog-agent/pkg/security/resolvers/process" @@ -29,13 +28,13 @@ type Resolvers struct { } // NewResolvers creates a new instance of Resolvers -func NewResolvers(config *config.Config, statsdClient statsd.ClientInterface, scrubber *procutil.DataScrubber, telemetry telemetry.Component, opts Opts) (*Resolvers, error) { +func NewResolvers(config *config.Config, statsdClient statsd.ClientInterface, scrubber *procutil.DataScrubber, opts Opts) (*Resolvers, error) { processResolver, err := process.NewResolver(config, statsdClient, scrubber, process.NewResolverOpts()) if err != nil { return nil, err } - tagsResolver := tags.NewResolver(telemetry, opts.Tagger) + tagsResolver := tags.NewResolver(opts.Tagger) userSessionsResolver, err := usersessions.NewResolver(config.RuntimeSecurity) if err != nil { diff --git a/pkg/security/resolvers/tags/resolver.go b/pkg/security/resolvers/tags/resolver.go index 1ee12e3bc00f3..99800bcbe4554 100644 --- a/pkg/security/resolvers/tags/resolver.go +++ b/pkg/security/resolvers/tags/resolver.go @@ -8,15 +8,9 @@ package tags import ( "context" - "fmt" - coreconfig "github.com/DataDog/datadog-agent/comp/core/config" - taggerdef "github.com/DataDog/datadog-agent/comp/core/tagger/def" - remotetagger "github.com/DataDog/datadog-agent/comp/core/tagger/impl-remote" "github.com/DataDog/datadog-agent/comp/core/tagger/types" - "github.com/DataDog/datadog-agent/comp/core/telemetry" - "github.com/DataDog/datadog-agent/pkg/api/security" - pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" + "github.com/DataDog/datadog-agent/pkg/security/seclog" "github.com/DataDog/datadog-agent/pkg/security/utils" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -34,6 +28,7 @@ type Tagger interface { Start(ctx context.Context) error Stop() error Tag(entity types.EntityID, cardinality types.TagCardinality) ([]string, error) + GlobalTags(cardinality types.TagCardinality) ([]string, error) } // Resolver represents a cache resolver @@ -58,8 +53,18 @@ func (t *DefaultResolver) Resolve(id string) []string { // ResolveWithErr returns the tags for the given id func (t *DefaultResolver) ResolveWithErr(id string) ([]string, error) { - entityID := types.NewEntityID(types.ContainerID, id) - return t.tagger.Tag(entityID, types.OrchestratorCardinality) + return GetTagsOfContainer(t.tagger, id) +} + +// GetTagsOfContainer returns the tags for the given container id +// exported to share the code with other non-resolver users of tagger +func GetTagsOfContainer(tagger Tagger, containerID string) ([]string, error) { + if tagger == nil { + return nil, nil + } + + entityID := types.NewEntityID(types.ContainerID, containerID) + return tagger.Tag(entityID, types.OrchestratorCardinality) } // GetValue return the tag value for the given id and tag name @@ -69,6 +74,10 @@ func (t *DefaultResolver) GetValue(id string, tag string) string { // Start the resolver func (t *DefaultResolver) Start(ctx context.Context) error { + if t.tagger == nil { + return nil + } + go func() { if err := t.tagger.Start(ctx); err != nil { log.Errorf("failed to init tagger: %s", err) @@ -85,29 +94,19 @@ func (t *DefaultResolver) Start(ctx context.Context) error { // Stop the resolver func (t *DefaultResolver) Stop() error { + if t.tagger == nil { + return nil + } return t.tagger.Stop() } // NewDefaultResolver returns a new default tags resolver -func NewDefaultResolver(telemetry telemetry.Component, tagger Tagger) *DefaultResolver { - ddConfig := pkgconfigsetup.Datadog() - resolver := &DefaultResolver{ - tagger: tagger, - } - - params := taggerdef.RemoteParams{ - RemoteFilter: types.NewMatchAllFilter(), - RemoteTarget: func(c coreconfig.Component) (string, error) { return fmt.Sprintf(":%v", c.GetInt("cmd_port")), nil }, - RemoteTokenFetcher: func(c coreconfig.Component) func() (string, error) { - return func() (string, error) { - return security.FetchAuthToken(c) - } - }, - } - +func NewDefaultResolver(tagger Tagger) *DefaultResolver { if tagger == nil { - resolver.tagger, _ = remotetagger.NewRemoteTagger(params, ddConfig, log.NewWrapper(2), telemetry) + seclog.Errorf("initializing tags resolver with nil tagger") } - return resolver + return &DefaultResolver{ + tagger: tagger, + } } diff --git a/pkg/security/resolvers/tags/resolver_linux.go b/pkg/security/resolvers/tags/resolver_linux.go index 7730902212f87..b56ad3b0438d1 100644 --- a/pkg/security/resolvers/tags/resolver_linux.go +++ b/pkg/security/resolvers/tags/resolver_linux.go @@ -11,7 +11,6 @@ import ( "fmt" "time" - "github.com/DataDog/datadog-agent/comp/core/telemetry" "github.com/DataDog/datadog-agent/pkg/security/resolvers/cgroup" cgroupModel "github.com/DataDog/datadog-agent/pkg/security/resolvers/cgroup/model" "github.com/DataDog/datadog-agent/pkg/security/utils" @@ -89,10 +88,10 @@ func (t *LinuxResolver) fetchTags(container *cgroupModel.CacheEntry) error { } // NewResolver returns a new tags resolver -func NewResolver(telemetry telemetry.Component, tagger Tagger, cgroupsResolver *cgroup.Resolver) *LinuxResolver { +func NewResolver(tagger Tagger, cgroupsResolver *cgroup.Resolver) *LinuxResolver { resolver := &LinuxResolver{ Notifier: utils.NewNotifier[Event, *cgroupModel.CacheEntry](), - DefaultResolver: NewDefaultResolver(telemetry, tagger), + DefaultResolver: NewDefaultResolver(tagger), workloadsWithoutTags: make(chan *cgroupModel.CacheEntry, 100), cgroupResolver: cgroupsResolver, } diff --git a/pkg/security/resolvers/tags/resolver_other.go b/pkg/security/resolvers/tags/resolver_other.go index d00f07ce02113..aced8e0a0d229 100644 --- a/pkg/security/resolvers/tags/resolver_other.go +++ b/pkg/security/resolvers/tags/resolver_other.go @@ -8,11 +8,7 @@ // Package tags holds tags related files package tags -import ( - "github.com/DataDog/datadog-agent/comp/core/telemetry" -) - // NewResolver returns a new tags resolver -func NewResolver(telemetry telemetry.Component, tagger Tagger) Resolver { - return NewDefaultResolver(telemetry, tagger) +func NewResolver(tagger Tagger) Resolver { + return NewDefaultResolver(tagger) } diff --git a/pkg/security/resolvers/tc/resolver.go b/pkg/security/resolvers/tc/resolver.go index eef9bbf1dfde9..3dc1f568b036c 100644 --- a/pkg/security/resolvers/tc/resolver.go +++ b/pkg/security/resolvers/tc/resolver.go @@ -149,6 +149,7 @@ func (tcr *Resolver) FlushNetworkNamespaceID(namespaceID uint32, m *manager.Mana for tcKey, tcProbe := range tcr.programs { if tcKey.NetDevice.NetNS == namespaceID { + ddebpf.RemoveProgramID(tcProbe.ID(), "cws") _ = m.DetachHook(tcProbe.ProbeIdentificationPair) delete(tcr.programs, tcKey) } @@ -166,6 +167,7 @@ func (tcr *Resolver) FlushInactiveProbes(m *manager.Manager, isLazy func(string) var linkName string for tcKey, tcProbe := range tcr.programs { if !tcProbe.IsTCFilterActive() { + ddebpf.RemoveProgramID(tcProbe.ID(), "cws") _ = m.DetachHook(tcProbe.ProbeIdentificationPair) delete(tcr.programs, tcKey) } else { diff --git a/pkg/security/secl/model/model.go b/pkg/security/secl/model/model.go index 792098c53c681..9a6ae500f2d6f 100644 --- a/pkg/security/secl/model/model.go +++ b/pkg/security/secl/model/model.go @@ -72,12 +72,11 @@ func (r *Releasable) AppendReleaseCallback(callback func()) { // ContainerContext holds the container context of an event type ContainerContext struct { Releasable - ContainerID containerutils.ContainerID `field:"id,handler:ResolveContainerID"` // SECLDoc[id] Definition:`ID of the container` - CreatedAt uint64 `field:"created_at,handler:ResolveContainerCreatedAt"` // SECLDoc[created_at] Definition:`Timestamp of the creation of the container`` - Tags []string `field:"tags,handler:ResolveContainerTags,opts:skip_ad,weight:9999"` // SECLDoc[tags] Definition:`Tags of the container` - TagsResolved bool `field:"-"` - Resolved bool `field:"-"` - Runtime string `field:"runtime,handler:ResolveContainerRuntime"` // SECLDoc[runtime] Definition:`Runtime managing the container` + ContainerID containerutils.ContainerID `field:"id,handler:ResolveContainerID"` // SECLDoc[id] Definition:`ID of the container` + CreatedAt uint64 `field:"created_at,handler:ResolveContainerCreatedAt"` // SECLDoc[created_at] Definition:`Timestamp of the creation of the container`` + Tags []string `field:"tags,handler:ResolveContainerTags,opts:skip_ad,weight:9999"` // SECLDoc[tags] Definition:`Tags of the container` + Resolved bool `field:"-"` + Runtime string `field:"runtime,handler:ResolveContainerRuntime"` // SECLDoc[runtime] Definition:`Runtime managing the container` } // SecurityProfileContext holds the security context of the profile @@ -330,7 +329,7 @@ type MatchedRule struct { type ActionReport interface { ToJSON() ([]byte, error) IsMatchingRule(ruleID eval.RuleID) bool - IsResolved() bool + IsResolved() error } // NewMatchedRule return a new MatchedRule instance diff --git a/pkg/security/secl/model/model_unix.go b/pkg/security/secl/model/model_unix.go index af3714a54f521..caef9bf28ce3b 100644 --- a/pkg/security/secl/model/model_unix.go +++ b/pkg/security/secl/model/model_unix.go @@ -538,6 +538,7 @@ type PTraceEvent struct { Request uint32 `field:"request"` // SECLDoc[request] Definition:`ptrace request` Constants:`Ptrace constants` PID uint32 `field:"-"` + NSPID uint32 `field:"-"` Address uint64 `field:"-"` Tracee *ProcessContext `field:"tracee"` // process context of the tracee } diff --git a/pkg/security/secl/model/unmarshallers_linux.go b/pkg/security/secl/model/unmarshallers_linux.go index 51d4f892ba218..f198adf0bba84 100644 --- a/pkg/security/secl/model/unmarshallers_linux.go +++ b/pkg/security/secl/model/unmarshallers_linux.go @@ -833,14 +833,15 @@ func (e *PTraceEvent) UnmarshalBinary(data []byte) (int, error) { return 0, err } - if len(data)-read < 16 { + if len(data)-read < 20 { return 0, ErrNotEnoughData } e.Request = binary.NativeEndian.Uint32(data[read : read+4]) e.PID = binary.NativeEndian.Uint32(data[read+4 : read+8]) e.Address = binary.NativeEndian.Uint64(data[read+8 : read+16]) - return read + 16, nil + e.NSPID = binary.NativeEndian.Uint32(data[read+16 : read+20]) + return read + 20, nil } // UnmarshalBinary unmarshals a binary representation of itself diff --git a/pkg/security/seclwin/model/model.go b/pkg/security/seclwin/model/model.go index 792098c53c681..9a6ae500f2d6f 100644 --- a/pkg/security/seclwin/model/model.go +++ b/pkg/security/seclwin/model/model.go @@ -72,12 +72,11 @@ func (r *Releasable) AppendReleaseCallback(callback func()) { // ContainerContext holds the container context of an event type ContainerContext struct { Releasable - ContainerID containerutils.ContainerID `field:"id,handler:ResolveContainerID"` // SECLDoc[id] Definition:`ID of the container` - CreatedAt uint64 `field:"created_at,handler:ResolveContainerCreatedAt"` // SECLDoc[created_at] Definition:`Timestamp of the creation of the container`` - Tags []string `field:"tags,handler:ResolveContainerTags,opts:skip_ad,weight:9999"` // SECLDoc[tags] Definition:`Tags of the container` - TagsResolved bool `field:"-"` - Resolved bool `field:"-"` - Runtime string `field:"runtime,handler:ResolveContainerRuntime"` // SECLDoc[runtime] Definition:`Runtime managing the container` + ContainerID containerutils.ContainerID `field:"id,handler:ResolveContainerID"` // SECLDoc[id] Definition:`ID of the container` + CreatedAt uint64 `field:"created_at,handler:ResolveContainerCreatedAt"` // SECLDoc[created_at] Definition:`Timestamp of the creation of the container`` + Tags []string `field:"tags,handler:ResolveContainerTags,opts:skip_ad,weight:9999"` // SECLDoc[tags] Definition:`Tags of the container` + Resolved bool `field:"-"` + Runtime string `field:"runtime,handler:ResolveContainerRuntime"` // SECLDoc[runtime] Definition:`Runtime managing the container` } // SecurityProfileContext holds the security context of the profile @@ -330,7 +329,7 @@ type MatchedRule struct { type ActionReport interface { ToJSON() ([]byte, error) IsMatchingRule(ruleID eval.RuleID) bool - IsResolved() bool + IsResolved() error } // NewMatchedRule return a new MatchedRule instance diff --git a/pkg/security/security_profile/profile/profile_dir.go b/pkg/security/security_profile/profile/profile_dir.go index 6f0ef53b76569..d9e6d62c71bd7 100644 --- a/pkg/security/security_profile/profile/profile_dir.go +++ b/pkg/security/security_profile/profile/profile_dir.go @@ -48,6 +48,11 @@ type profileFSEntry struct { selector cgroupModel.WorkloadSelector } +type existingProfileInfo struct { + path string + selector string +} + // DirectoryProvider is a ProfileProvider that fetches Security Profiles from the filesystem type DirectoryProvider struct { sync.Mutex @@ -213,24 +218,24 @@ func (dp *DirectoryProvider) listProfiles() ([]string, error) { return output, nil } -func (dp *DirectoryProvider) loadProfile(profilePath string) error { +func (dp *DirectoryProvider) loadProfile(profilePath string) (*existingProfileInfo, error) { profile, err := LoadProtoFromFile(profilePath) if err != nil { - return fmt.Errorf("couldn't load profile %s: %w", profilePath, err) + return nil, fmt.Errorf("couldn't load profile %s: %w", profilePath, err) } if len(profile.ProfileContexts) == 0 { - return fmt.Errorf("couldn't load profile %s: it did not contains any version", profilePath) + return nil, fmt.Errorf("couldn't load profile %s: it did not contains any version", profilePath) } imageName, imageTag := profile.Selector.GetImageName(), profile.Selector.GetImageTag() if imageTag == "" || imageName == "" { - return fmt.Errorf("couldn't load profile %s: it did not contains any valid image_name (%s) or image_tag (%s)", profilePath, imageName, imageTag) + return nil, fmt.Errorf("couldn't load profile %s: it did not contains any valid image_name (%s) or image_tag (%s)", profilePath, imageName, imageTag) } workloadSelector, err := cgroupModel.NewWorkloadSelector(imageName, imageTag) if err != nil { - return err + return nil, err } profileManagerSelector := workloadSelector profileManagerSelector.Tag = "*" @@ -241,7 +246,7 @@ func (dp *DirectoryProvider) loadProfile(profilePath string) error { // prioritize a persited profile over activity dumps if _, ok := dp.profileMapping[profileManagerSelector]; ok { dp.Unlock() - return fmt.Errorf("ignoring %s: a persisted profile already exists for workload %s", profilePath, profileManagerSelector.String()) + return &existingProfileInfo{path: profilePath, selector: profileManagerSelector.String()}, nil } // update profile mapping @@ -260,7 +265,7 @@ func (dp *DirectoryProvider) loadProfile(profilePath string) error { seclog.Debugf("security profile %s loaded from file system", workloadSelector) if propagateCb == nil { - return nil + return nil, nil } // check if this profile matches a workload selector @@ -269,7 +274,7 @@ func (dp *DirectoryProvider) loadProfile(profilePath string) error { propagateCb(workloadSelector, profile) } } - return nil + return nil, nil } func (dp *DirectoryProvider) loadProfiles() error { @@ -279,8 +284,11 @@ func (dp *DirectoryProvider) loadProfiles() error { } for _, profilePath := range files { - if err = dp.loadProfile(profilePath); err != nil { + existingProfile, err := dp.loadProfile(profilePath) + if err != nil { seclog.Errorf("couldn't load profile: %v", err) + } else if existingProfile != nil { + seclog.Debugf("ignoring %s: a persisted profile already exists for workload %s", existingProfile.path, existingProfile.selector) } } return nil @@ -340,13 +348,16 @@ func (dp *DirectoryProvider) onHandleFilesFromWatcher() { var filesToCleanup []string for file := range dp.newFiles { - if err := dp.loadProfile(file); err != nil { + existingProfile, err := dp.loadProfile(file) + if err != nil { if errors.Is(err, cgroupModel.ErrNoImageProvided) { seclog.Debugf("couldn't load new profile %s: %v", file, err) } else { seclog.Warnf("couldn't load new profile %s: %v", file, err) } - + filesToCleanup = append(filesToCleanup, file) + } else if existingProfile != nil { + seclog.Debugf("ignoring %s: a persisted profile already exists for workload %s", existingProfile.path, existingProfile.selector) filesToCleanup = append(filesToCleanup, file) } } diff --git a/pkg/security/serializers/serializers_linux.go b/pkg/security/serializers/serializers_linux.go index 7a409890794e0..07d102105713f 100644 --- a/pkg/security/serializers/serializers_linux.go +++ b/pkg/security/serializers/serializers_linux.go @@ -895,10 +895,24 @@ func newMProtectEventSerializer(e *model.Event) *MProtectEventSerializer { } func newPTraceEventSerializer(e *model.Event) *PTraceEventSerializer { + if e.PTrace.Tracee == nil { + return nil + } + + fakeTraceeEvent := &model.Event{ + BaseEvent: model.BaseEvent{ + FieldHandlers: e.FieldHandlers, + ProcessContext: e.PTrace.Tracee, + ContainerContext: &model.ContainerContext{ + ContainerID: e.PTrace.Tracee.ContainerID, + }, + }, + } + return &PTraceEventSerializer{ Request: model.PTraceRequest(e.PTrace.Request).String(), Address: fmt.Sprintf("0x%x", e.PTrace.Address), - Tracee: newProcessContextSerializer(e.PTrace.Tracee, e), + Tracee: newProcessContextSerializer(e.PTrace.Tracee, fakeTraceeEvent), } } diff --git a/pkg/security/tests/fake_tags_resolver.go b/pkg/security/tests/fake_tags_resolver.go index 02bd896865298..a8b1694349a3c 100644 --- a/pkg/security/tests/fake_tags_resolver.go +++ b/pkg/security/tests/fake_tags_resolver.go @@ -53,6 +53,11 @@ func (fr *FakeTagger) Tag(entity types.EntityID, _ types.TagCardinality) ([]stri return append(fakeTags, fmt.Sprintf("image_name:fake_ubuntu_%d", len(fr.containerIDs))), nil } +// GlobalTags returns the global tags +func (fr *FakeTagger) GlobalTags(_ types.TagCardinality) ([]string, error) { + return nil, nil +} + // NewFakeTaggerDifferentImageNames returns a new tagger func NewFakeTaggerDifferentImageNames() tags.Tagger { return &FakeTagger{} @@ -78,6 +83,11 @@ func (fmr *FakeMonoTagger) Tag(entity types.EntityID, _ types.TagCardinality) ([ return []string{"container_id:" + entity.GetID(), "image_name:fake_ubuntu", "image_tag:latest"}, nil } +// GlobalTags returns the global tags +func (fmr *FakeMonoTagger) GlobalTags(_ types.TagCardinality) ([]string, error) { + return nil, nil +} + // NewFakeMonoTagger returns a new tags tagger func NewFakeMonoTagger() tags.Tagger { return &FakeMonoTagger{} @@ -150,6 +160,11 @@ func (fmr *FakeManualTagger) Tag(entity types.EntityID, _ types.TagCardinality) return []string{"container_id:" + containerID, "image_name:" + selector.Image, "image_tag:" + selector.Tag}, nil } +// GlobalTags returns the global tags +func (fmr *FakeManualTagger) GlobalTags(_ types.TagCardinality) ([]string, error) { + return nil, nil +} + // NewFakeManualTagger returns a new tagger func NewFakeManualTagger() *FakeManualTagger { return &FakeManualTagger{ diff --git a/pkg/security/tests/module_tester_linux.go b/pkg/security/tests/module_tester_linux.go index c790771be8c29..268fc839f625e 100644 --- a/pkg/security/tests/module_tester_linux.go +++ b/pkg/security/tests/module_tester_linux.go @@ -741,7 +741,7 @@ func newTestModuleWithOnDemandProbes(t testing.TB, onDemandHooks []rules.OnDeman emopts.ProbeOpts.DontDiscardRuntime = false } - testMod.eventMonitor, err = eventmonitor.NewEventMonitor(emconfig, secconfig, emopts, nil) + testMod.eventMonitor, err = eventmonitor.NewEventMonitor(emconfig, secconfig, emopts) if err != nil { return nil, err } diff --git a/pkg/security/tests/module_tester_windows.go b/pkg/security/tests/module_tester_windows.go index 0d68f2aed4023..25876dc813692 100644 --- a/pkg/security/tests/module_tester_windows.go +++ b/pkg/security/tests/module_tester_windows.go @@ -163,7 +163,7 @@ func newTestModule(t testing.TB, macroDefs []*rules.MacroDefinition, ruleDefs [] emopts.ProbeOpts.Tagger = NewFakeTaggerDifferentImageNames() } - testMod.eventMonitor, err = eventmonitor.NewEventMonitor(emconfig, secconfig, emopts, nil) + testMod.eventMonitor, err = eventmonitor.NewEventMonitor(emconfig, secconfig, emopts) if err != nil { return nil, err } diff --git a/pkg/security/tests/ptrace_test.go b/pkg/security/tests/ptrace_test.go index 1052a9791555d..f6f660422ae46 100644 --- a/pkg/security/tests/ptrace_test.go +++ b/pkg/security/tests/ptrace_test.go @@ -12,6 +12,7 @@ import ( "fmt" "os/exec" "testing" + "time" "github.com/stretchr/testify/assert" @@ -24,9 +25,17 @@ func TestPTraceEvent(t *testing.T) { ruleDefs := []*rules.RuleDefinition{ { - ID: "test_ptrace", + ID: "test_ptrace_cont", Expression: `ptrace.request == PTRACE_CONT && ptrace.tracee.file.name == "syscall_tester"`, }, + { + ID: "test_ptrace_me", + Expression: `ptrace.request == PTRACE_TRACEME && process.file.name == "syscall_tester"`, + }, + { + ID: "test_ptrace_attach", + Expression: `ptrace.request == PTRACE_ATTACH && ptrace.tracee.file.name == "syscall_tester"`, + }, } test, err := newTestModule(t, nil, ruleDefs) @@ -40,18 +49,18 @@ func TestPTraceEvent(t *testing.T) { t.Fatal(err) } - test.Run(t, "ptrace", func(t *testing.T, _ wrapperType, cmdFunc func(cmd string, args []string, envs []string) *exec.Cmd) { + test.Run(t, "ptrace-cont", func(t *testing.T, _ wrapperType, cmdFunc func(cmd string, args []string, envs []string) *exec.Cmd) { args := []string{"ptrace-traceme"} envs := []string{} - test.WaitSignal(t, func() error { + err := test.GetEventSent(t, func() error { cmd := cmdFunc(syscallTester, args, envs) if out, err := cmd.CombinedOutput(); err != nil { return fmt.Errorf("%s: %w", out, err) } return nil - }, func(event *model.Event, _ *rules.Rule) { + }, func(_ *rules.Rule, event *model.Event) bool { assert.Equal(t, "ptrace", event.GetType(), "wrong event type") assert.Equal(t, uint64(42), event.PTrace.Address, "wrong address") @@ -59,6 +68,63 @@ func TestPTraceEvent(t *testing.T) { assert.Equal(t, value.(bool), false) test.validatePTraceSchema(t, event) - }) + return true + }, time.Second*3, "test_ptrace_cont") + if err != nil { + t.Error(err) + } + }) + + test.Run(t, "ptrace-me", func(t *testing.T, _ wrapperType, cmdFunc func(cmd string, args []string, envs []string) *exec.Cmd) { + args := []string{"ptrace-traceme"} + envs := []string{} + + err := test.GetEventSent(t, func() error { + cmd := cmdFunc(syscallTester, args, envs) + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("%s: %w", out, err) + } + + return nil + }, func(_ *rules.Rule, event *model.Event) bool { + assert.Equal(t, "ptrace", event.GetType(), "wrong event type") + assert.Equal(t, uint64(0), event.PTrace.Address, "wrong address") + + value, _ := event.GetFieldValue("event.async") + assert.Equal(t, value.(bool), false) + + test.validatePTraceSchema(t, event) + return true + }, time.Second*3, "test_ptrace_me") + if err != nil { + t.Error(err) + } + }) + + test.Run(t, "ptrace-attach", func(t *testing.T, _ wrapperType, cmdFunc func(cmd string, args []string, envs []string) *exec.Cmd) { + args := []string{"ptrace-attach"} + envs := []string{} + + err := test.GetEventSent(t, func() error { + cmd := cmdFunc(syscallTester, args, envs) + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("%s: %w", out, err) + } + + return nil + }, func(_ *rules.Rule, event *model.Event) bool { + assert.Equal(t, "ptrace", event.GetType(), "wrong event type") + assert.Equal(t, uint64(0), event.PTrace.Address, "wrong address") + assert.Equal(t, event.PTrace.Tracee.PPid, event.PTrace.Tracee.Parent.Pid, "tracee wrong ppid / parent pid") + + value, _ := event.GetFieldValue("event.async") + assert.Equal(t, value.(bool), false) + + test.validatePTraceSchema(t, event) + return true + }, time.Second*3, "test_ptrace_attach") + if err != nil { + t.Error(err) + } }) } diff --git a/pkg/security/tests/syscall_tester/c/syscall_tester.c b/pkg/security/tests/syscall_tester/c/syscall_tester.c index 591bb2e6f8e81..76844cf222e5c 100644 --- a/pkg/security/tests/syscall_tester/c/syscall_tester.c +++ b/pkg/security/tests/syscall_tester/c/syscall_tester.c @@ -185,6 +185,19 @@ int ptrace_traceme() { return EXIT_SUCCESS; } +int ptrace_attach() { + int child = fork(); + if (child == 0) { + for (int i = 0; i < 20; i++) { + sleep(1); + } + } else { + ptrace(PTRACE_ATTACH, child, 0, NULL); + wait(NULL); + } + return EXIT_SUCCESS; +} + int test_signal_sigusr(int child, int sig) { int do_fork = child == 0; if (do_fork) { @@ -885,6 +898,8 @@ int main(int argc, char **argv) { exit_code = span_exec(sub_argc, sub_argv); } else if (strcmp(cmd, "ptrace-traceme") == 0) { exit_code = ptrace_traceme(); + } else if (strcmp(cmd, "ptrace-attach") == 0) { + exit_code = ptrace_attach(); } else if (strcmp(cmd, "span-open") == 0) { exit_code = span_open(sub_argc, sub_argv); } else if (strcmp(cmd, "pipe-chown") == 0) { diff --git a/pkg/security/tests/trace_pipe.go b/pkg/security/tests/trace_pipe.go index 973c03f1023ea..424968b2e3aaa 100644 --- a/pkg/security/tests/trace_pipe.go +++ b/pkg/security/tests/trace_pipe.go @@ -94,6 +94,9 @@ func (t *TracePipe) ReadLine() (*TraceEvent, error) { if err != nil { return nil, err } + if line == "\n" { + return nil, io.EOF + } traceEvent, err := parseTraceLine(line) if err != nil { return nil, err diff --git a/pkg/security/utils/proc_linux.go b/pkg/security/utils/proc_linux.go index 241aae6f91b3f..9fcbbde812ca7 100644 --- a/pkg/security/utils/proc_linux.go +++ b/pkg/security/utils/proc_linux.go @@ -8,6 +8,7 @@ package utils import ( "bufio" + "errors" "fmt" "io" "os" @@ -20,6 +21,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/security/secl/model" "github.com/DataDog/datadog-agent/pkg/util/kernel" + "github.com/shirou/gopsutil/v3/process" ) // Getpid returns the current process ID in the host namespace @@ -384,3 +386,85 @@ func FetchLoadedModules() (map[string]ProcFSModule, error) { return output, nil } + +// GetProcessPidNamespace returns the PID namespace of the given PID +func GetProcessPidNamespace(pid uint32) (uint64, error) { + nspidPath := procPidPath(pid, "ns/pid") + link, err := os.Readlink(nspidPath) + if err != nil { + return 0, err + } + // link should be in for of: pid:[4026532294] + if !strings.HasPrefix(link, "pid:[") { + return 0, fmt.Errorf("Failed to retrieve PID NS, pid ns malformated: (%s) err: %v", link, err) + } + + link = strings.TrimPrefix(link, "pid:[") + link = strings.TrimSuffix(link, "]") + + ns, err := strconv.ParseUint(link, 10, 64) + if err != nil { + return 0, fmt.Errorf("Failed to retrieve PID NS, pid ns malformated: (%s) err: %v", link, err) + } + return ns, nil +} + +// GetNsPids returns the namespaced pids of the the givent root pid +func GetNsPids(pid uint32) ([]uint32, error) { + statusFile := StatusPath(pid) + content, err := os.ReadFile(statusFile) + if err != nil { + return nil, fmt.Errorf("failed to read status file: %w", err) + } + + lines := strings.Split(string(content), "\n") + for _, line := range lines { + if strings.HasPrefix(line, "NSpid:") { + // Remove "NSpid:" prefix and trim spaces + values := strings.TrimPrefix(line, "NSpid:") + values = strings.TrimSpace(values) + + // Split the remaining string into fields + fields := strings.Fields(values) + + // Convert string values to integers + nspids := make([]uint32, 0, len(fields)) + for _, field := range fields { + val, err := strconv.ParseUint(field, 10, 64) + if err != nil { + return nil, fmt.Errorf("failed to parse NSpid value: %w", err) + } + nspids = append(nspids, uint32(val)) + } + return nspids, nil + } + } + return nil, fmt.Errorf("NSpid field not found") +} + +// FindPidNamespace search and return the host PID for the given namespaced PID + its namespace +func FindPidNamespace(nspid uint32, ns uint64) (uint32, error) { + procPids, err := process.Pids() + if err != nil { + return 0, err + } + + for _, procPid := range procPids { + procNs, err := GetProcessPidNamespace(uint32(procPid)) + if err != nil { + continue + } + + if procNs == ns { + nspids, err := GetNsPids(uint32(procPid)) + if err != nil { + return 0, err + } + // we look only at the last one, as it the most inner one and corresponding to its /proc/pid/ns/pid namespace + if nspids[len(nspids)-1] == nspid { + return uint32(procPid), nil + } + } + } + return 0, errors.New("PID not found") +} diff --git a/pkg/util/crio/crio_util.go b/pkg/util/crio/crio_util.go index f1b200fa658f6..f5d670690a2c3 100644 --- a/pkg/util/crio/crio_util.go +++ b/pkg/util/crio/crio_util.go @@ -8,7 +8,11 @@ package crio import ( "context" + "encoding/json" "fmt" + "io" + "os" + "path/filepath" "time" "google.golang.org/grpc" @@ -16,19 +20,24 @@ import ( "google.golang.org/grpc/credentials/insecure" v1 "k8s.io/cri-api/pkg/apis/runtime/v1" + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/config/env" + pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" + containersimage "github.com/DataDog/datadog-agent/pkg/util/containers/image" "github.com/DataDog/datadog-agent/pkg/util/retry" ) const ( - udsPrefix = "unix://%s" + defaultCrioSocketPath = "/var/run/crio/crio.sock" + udsPrefix = "unix://%s" + overlayPath = "/var/lib/containers/storage/overlay" + overlayImagePath = "/var/lib/containers/storage/overlay-images" + overlayLayersPath = "/var/lib/containers/storage/overlay-layers/layers.json" ) // Client defines an interface for interacting with the CRI-API, providing methods for // retrieving information about container and pod statuses, images, and metadata. type Client interface { - // Close terminates the CRI-O API connection and cleans up resources. - Close() error - // RuntimeMetadata returns metadata about the container runtime, including version details. // Accepts a context to manage request lifetime. RuntimeMetadata(ctx context.Context) (*v1.VersionResponse, error) @@ -42,12 +51,16 @@ type Client interface { GetContainerStatus(ctx context.Context, containerID string) (*v1.ContainerStatusResponse, error) // GetContainerImage fetches metadata for a specified image, identified by imageSpec. - // Accepts a context and the imageSpec to identify the image. - GetContainerImage(ctx context.Context, imageSpec *v1.ImageSpec) (*v1.Image, error) + // Accepts a context, the imageSpec to identify the image, and a verbose flag for detailed metadata. + GetContainerImage(ctx context.Context, imageSpec *v1.ImageSpec, verbose bool) (*v1.ImageStatusResponse, error) // GetPodStatus provides the status of a specified pod sandbox, identified by podSandboxID. // Takes a context to manage the request and returns sandbox status information. GetPodStatus(ctx context.Context, podSandboxID string) (*v1.PodSandboxStatus, error) + + // GetCRIOImageLayers returns paths to `diff` directories for each layer of the specified image, + // using imgMeta to identify the image and resolve its layers. + GetCRIOImageLayers(imgMeta *workloadmeta.ContainerImageMetadata) ([]string, error) } // clientImpl is a client to interact with the CRI-API. @@ -60,8 +73,8 @@ type clientImpl struct { } // NewCRIOClient creates a new CRI-O client implementing the Client interface. -func NewCRIOClient(socketPath string) (Client, error) { - +func NewCRIOClient() (Client, error) { + socketPath := getCRIOSocketPath() client := &clientImpl{socketPath: socketPath} client.initRetry.SetupRetrier(&retry.Config{ //nolint:errcheck @@ -74,20 +87,12 @@ func NewCRIOClient(socketPath string) (Client, error) { // Attempt connection with retry if err := client.initRetry.TriggerRetry(); err != nil { - return nil, fmt.Errorf("failed to initialize CRI-O client: %w", err) + return nil, fmt.Errorf("failed to initialize CRI-O client on socket %s: %w", socketPath, err) } return client, nil } -// Close closes the CRI-O client connection. -func (c *clientImpl) Close() error { - if c == nil || c.conn == nil { - return fmt.Errorf("CRI-O client is not initialized") - } - return c.conn.Close() -} - // RuntimeMetadata retrieves the runtime metadata including runtime name and version. func (c *clientImpl) RuntimeMetadata(ctx context.Context) (*v1.VersionResponse, error) { return c.runtimeClient.Version(ctx, &v1.VersionRequest{}) @@ -112,15 +117,15 @@ func (c *clientImpl) GetContainerStatus(ctx context.Context, containerID string) } // GetContainerImage retrieves the image status of a specific imageSpec. -func (c *clientImpl) GetContainerImage(ctx context.Context, imageSpec *v1.ImageSpec) (*v1.Image, error) { - imageStatusResponse, err := c.imageClient.ImageStatus(ctx, &v1.ImageStatusRequest{Image: imageSpec}) +func (c *clientImpl) GetContainerImage(ctx context.Context, imageSpec *v1.ImageSpec, verbose bool) (*v1.ImageStatusResponse, error) { + imageStatusResponse, err := c.imageClient.ImageStatus(ctx, &v1.ImageStatusRequest{Image: imageSpec, Verbose: verbose}) if err != nil { return nil, fmt.Errorf("failed to fetch image status for spec %s: %w", imageSpec.Image, err) } - if imageStatusResponse.GetImage() == nil { + if imageStatusResponse.Image == nil { return nil, fmt.Errorf("image not found for spec %s", imageSpec.Image) } - return imageStatusResponse.GetImage(), nil + return imageStatusResponse, nil } // GetPodStatus retrieves the status of a specific pod sandbox. @@ -129,7 +134,66 @@ func (c *clientImpl) GetPodStatus(ctx context.Context, podSandboxID string) (*v1 if err != nil { return nil, fmt.Errorf("failed to get pod status for pod ID %s: %w", podSandboxID, err) } - return podSandboxStatusResponse.GetStatus(), nil + return podSandboxStatusResponse.Status, nil +} + +// GetCRIOImageLayers returns the paths of each layer's `diff` directory in the correct order. +func (c *clientImpl) GetCRIOImageLayers(imgMeta *workloadmeta.ContainerImageMetadata) ([]string, error) { + var lowerDirs []string + + digestToIDMap, err := c.buildDigestToIDMap(imgMeta) + if err != nil { + return nil, fmt.Errorf("failed to build digest to ID map: %w", err) + } + + // Construct the list of lowerDirs by mapping each layer to its corresponding `diff` directory path + for _, layer := range imgMeta.Layers { + if layer.Digest == "" { // Skip empty layers + continue + } + layerID, found := digestToIDMap[layer.Digest] + if !found { + return nil, fmt.Errorf("layer ID not found for digest %s", layer.Digest) + } + + layerPath := filepath.Join(GetOverlayPath(), layerID, "diff") + lowerDirs = append([]string{layerPath}, lowerDirs...) + } + + return lowerDirs, nil +} + +// GetOverlayImagePath returns the path to the overlay-images directory. +func GetOverlayImagePath() string { + if env.IsContainerized() { + return containersimage.SanitizeHostPath(overlayImagePath) + } + return overlayImagePath +} + +// GetOverlayPath returns the path to the overlay directory. +func GetOverlayPath() string { + if env.IsContainerized() { + return containersimage.SanitizeHostPath(overlayPath) + } + return overlayPath +} + +// GetOverlayLayersPath returns the path to the overlay-layers directory. +func GetOverlayLayersPath() string { + if env.IsContainerized() { + return containersimage.SanitizeHostPath(overlayLayersPath) + } + return overlayLayersPath +} + +// getCRIOSocketPath returns the configured CRI-O socket path or the default path. +func getCRIOSocketPath() string { + criSocket := pkgconfigsetup.Datadog().GetString("cri_socket_path") + if criSocket == "" { + return defaultCrioSocketPath + } + return criSocket } // connect establishes a gRPC connection. @@ -160,3 +224,44 @@ func (c *clientImpl) connect() error { return nil } + +// buildDigestToIDMap creates a map of layer digests to IDs for the layers in imgMeta. +func (c *clientImpl) buildDigestToIDMap(imgMeta *workloadmeta.ContainerImageMetadata) (map[string]string, error) { + file, err := os.Open(GetOverlayLayersPath()) + if err != nil { + return nil, fmt.Errorf("failed to open layers.json: %w", err) + } + defer file.Close() + + fileBytes, err := io.ReadAll(file) + if err != nil { + return nil, fmt.Errorf("failed to read layers.json: %w", err) + } + + var layers []layerInfo + if err := json.Unmarshal(fileBytes, &layers); err != nil { + return nil, fmt.Errorf("failed to parse layers.json: %w", err) + } + + neededDigests := make(map[string]struct{}) + for _, layer := range imgMeta.Layers { + if layer.Digest != "" { // Skip empty layers + neededDigests[layer.Digest] = struct{}{} + } + } + + digestToIDMap := make(map[string]string) + for _, layer := range layers { + if _, found := neededDigests[layer.DiffDigest]; found { + digestToIDMap[layer.DiffDigest] = layer.ID + } + } + + return digestToIDMap, nil +} + +// layerInfo represents each entry in layers.json +type layerInfo struct { + ID string `json:"id"` + DiffDigest string `json:"diff-digest"` +} diff --git a/pkg/util/static_tags.go b/pkg/util/static_tags.go index f0591d0243f9d..f6173daa9e450 100644 --- a/pkg/util/static_tags.go +++ b/pkg/util/static_tags.go @@ -10,9 +10,11 @@ import ( "strings" "github.com/DataDog/datadog-agent/pkg/config/env" + "github.com/DataDog/datadog-agent/pkg/config/model" pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" configUtils "github.com/DataDog/datadog-agent/pkg/config/utils" "github.com/DataDog/datadog-agent/pkg/util/fargate" + "github.com/DataDog/datadog-agent/pkg/util/flavor" "github.com/DataDog/datadog-agent/pkg/util/kubernetes/clustername" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -70,19 +72,45 @@ func GetStaticTagsSlice(ctx context.Context) []string { return tags } -// GetStaticTags is similar to GetStaticTagsSlice, but returning a map[string]string containing +// GetStaticTags is similar to GetStaticTagsSlice, but returning a map[string][]string containing // : pairs for tags. Tags not matching this pattern are omitted. -func GetStaticTags(ctx context.Context) map[string]string { +func GetStaticTags(ctx context.Context) map[string][]string { tags := GetStaticTagsSlice(ctx) if tags == nil { return nil } + return sliceToMap(tags) +} + +// GetGlobalEnvTags is similar to GetStaticTags, but returning a map[string][]string containing +// : pairs for all global environment tags on the cluster agent. This includes: +// DD_TAGS, DD_EXTRA_TAGS, DD_CLUSTER_CHECKS_EXTRA_TAGS, and DD_ORCHESTRATOR_EXPLORER_EXTRA_TAGS +func GetGlobalEnvTags(config model.Reader) map[string][]string { + if flavor.GetFlavor() != flavor.ClusterAgent { + return nil + } + + // DD_TAGS / DD_EXTRA_TAGS + tags := configUtils.GetConfiguredTags(config, false) + + // DD_CLUSTER_CHECKS_EXTRA_TAGS / DD_ORCHESTRATOR_EXPLORER_EXTRA_TAGS + tags = append(tags, configUtils.GetConfiguredDCATags(config)...) - rv := make(map[string]string, len(tags)) + if tags == nil { + return nil + } + return sliceToMap(tags) +} + +func sliceToMap(tags []string) map[string][]string { + rv := make(map[string][]string, len(tags)) for _, t := range tags { tagParts := strings.SplitN(t, ":", 2) if len(tagParts) == 2 { - rv[tagParts[0]] = tagParts[1] + if _, ok := rv[tagParts[0]]; !ok { + rv[tagParts[0]] = []string{} + } + rv[tagParts[0]] = append(rv[tagParts[0]], tagParts[1]) } } return rv diff --git a/pkg/util/static_tags_test.go b/pkg/util/static_tags_test.go index e9d9de3374ad8..013e79689fe94 100644 --- a/pkg/util/static_tags_test.go +++ b/pkg/util/static_tags_test.go @@ -13,6 +13,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/config/env" configmock "github.com/DataDog/datadog-agent/pkg/config/mock" + "github.com/DataDog/datadog-agent/pkg/util/flavor" ) func TestStaticTags(t *testing.T) { @@ -26,10 +27,10 @@ func TestStaticTags(t *testing.T) { mockConfig.SetWithoutSource("tags", []string{"some:tag", "another:tag", "nocolon"}) defer mockConfig.SetWithoutSource("tags", []string{}) staticTags := GetStaticTags(context.Background()) - assert.Equal(t, map[string]string{ - "some": "tag", - "another": "tag", - "eks_fargate_node": "eksnode", + assert.Equal(t, map[string][]string{ + "some": {"tag"}, + "another": {"tag"}, + "eks_fargate_node": {"eksnode"}, }, staticTags) }) @@ -39,10 +40,10 @@ func TestStaticTags(t *testing.T) { defer mockConfig.SetWithoutSource("tags", []string{}) defer mockConfig.SetWithoutSource("extra_tags", []string{}) staticTags := GetStaticTags(context.Background()) - assert.Equal(t, map[string]string{ - "some": "tag", - "extra": "tag", - "eks_fargate_node": "eksnode", + assert.Equal(t, map[string][]string{ + "some": {"tag"}, + "extra": {"tag"}, + "eks_fargate_node": {"eksnode"}, }, staticTags) }) @@ -50,9 +51,9 @@ func TestStaticTags(t *testing.T) { mockConfig.SetWithoutSource("tags", []string{"kube_cluster_name:foo"}) defer mockConfig.SetWithoutSource("tags", []string{}) staticTags := GetStaticTags(context.Background()) - assert.Equal(t, map[string]string{ - "eks_fargate_node": "eksnode", - "kube_cluster_name": "foo", + assert.Equal(t, map[string][]string{ + "eks_fargate_node": {"eksnode"}, + "kube_cluster_name": {"foo"}, }, staticTags) }) } @@ -91,3 +92,33 @@ func TestStaticTagsSlice(t *testing.T) { }, staticTags) }) } + +func TestExtraGlobalEnvTags(t *testing.T) { + mockConfig := configmock.New(t) + mockConfig.SetWithoutSource("tags", []string{"some:tag", "nocolon"}) + mockConfig.SetWithoutSource("extra_tags", []string{"extra:tag", "missingcolon"}) + mockConfig.SetWithoutSource("cluster_checks.extra_tags", []string{"cluster:tag", "nocolon"}) + mockConfig.SetWithoutSource("orchestrator_explorer.extra_tags", []string{"orch:tag", "missingcolon"}) + + recordFlavor := flavor.GetFlavor() + defer func() { + flavor.SetFlavor(recordFlavor) + }() + + t.Run("Agent extraGlobalTags", func(t *testing.T) { + flavor.SetFlavor(flavor.DefaultAgent) + globalTags := GetGlobalEnvTags(mockConfig) + assert.Equal(t, map[string][]string(nil), globalTags) + }) + + t.Run("ClusterAgent extraGlobalTags", func(t *testing.T) { + flavor.SetFlavor(flavor.ClusterAgent) + globalTags := GetGlobalEnvTags(mockConfig) + assert.Equal(t, map[string][]string{ + "some": {"tag"}, + "extra": {"tag"}, + "cluster": {"tag"}, + "orch": {"tag"}, + }, globalTags) + }) +} diff --git a/pkg/util/trivy/trivy.go b/pkg/util/trivy/trivy.go index e39d387b06276..07a15d58d93ac 100644 --- a/pkg/util/trivy/trivy.go +++ b/pkg/util/trivy/trivy.go @@ -30,6 +30,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/sbom" cutil "github.com/DataDog/datadog-agent/pkg/util/containerd" containersimage "github.com/DataDog/datadog-agent/pkg/util/containers/image" + "github.com/DataDog/datadog-agent/pkg/util/crio" "github.com/DataDog/datadog-agent/pkg/util/log" "github.com/DataDog/datadog-agent/pkg/util/optional" @@ -404,6 +405,22 @@ func (c *Collector) ScanContainerdImageFromFilesystem(ctx context.Context, imgMe return c.scanFilesystem(ctx, os.DirFS("/"), imagePath, imgMeta, scanOptions) } +// ScanCRIOImageFromOverlayFS scans the CRI-O image layers using OverlayFS. +func (c *Collector) ScanCRIOImageFromOverlayFS(ctx context.Context, imgMeta *workloadmeta.ContainerImageMetadata, client crio.Client, scanOptions sbom.ScanOptions) (sbom.Report, error) { + lowerDirs, err := client.GetCRIOImageLayers(imgMeta) + if err != nil { + return nil, fmt.Errorf("failed to retrieve layer directories: %w", err) + } + + report, err := c.scanOverlayFS(ctx, lowerDirs, imgMeta, scanOptions) + if err != nil { + return nil, err + } + + return report, nil +} + +// scanFilesystem scans the specified directory and logs detailed scan steps. func (c *Collector) scanFilesystem(ctx context.Context, fsys fs.FS, path string, imgMeta *workloadmeta.ContainerImageMetadata, scanOptions sbom.ScanOptions) (sbom.Report, error) { // For filesystem scans, it is required to walk the filesystem to get the persistentCache key so caching does not add any value. // TODO: Cache directly the trivy report for container images diff --git a/release.json b/release.json index 51be2bf1b6144..6ba0cdd4d879f 100644 --- a/release.json +++ b/release.json @@ -7,7 +7,7 @@ }, "nightly": { "INTEGRATIONS_CORE_VERSION": "master", - "OMNIBUS_SOFTWARE_VERSION": "58b335c2c49efc266e9e707d9a2a36198ff8f1a3", + "OMNIBUS_SOFTWARE_VERSION": "0059a287d5543305c01a098740ba328aef7fa8ff", "OMNIBUS_RUBY_VERSION": "d365e483ee05a13e55eeb5208d11452c5b65afbb", "JMXFETCH_VERSION": "0.49.6", "JMXFETCH_HASH": "f06bdac1f8ec41daf9b9839ac883f1865a068b04810ea82197b8a6afb9369cb9", @@ -26,7 +26,7 @@ }, "nightly-a7": { "INTEGRATIONS_CORE_VERSION": "master", - "OMNIBUS_SOFTWARE_VERSION": "58b335c2c49efc266e9e707d9a2a36198ff8f1a3", + "OMNIBUS_SOFTWARE_VERSION": "0059a287d5543305c01a098740ba328aef7fa8ff", "OMNIBUS_RUBY_VERSION": "d365e483ee05a13e55eeb5208d11452c5b65afbb", "JMXFETCH_VERSION": "0.49.6", "JMXFETCH_HASH": "f06bdac1f8ec41daf9b9839ac883f1865a068b04810ea82197b8a6afb9369cb9", diff --git a/releasenotes/notes/NDMII-3154-ha-agent-collector-worker-22f3972469c669c3.yaml b/releasenotes/notes/NDMII-3154-ha-agent-collector-worker-22f3972469c669c3.yaml new file mode 100644 index 0000000000000..fa04e9cd53682 --- /dev/null +++ b/releasenotes/notes/NDMII-3154-ha-agent-collector-worker-22f3972469c669c3.yaml @@ -0,0 +1,11 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +enhancements: + - | + [ha-agent] Run HA enabled integrations only on leader Agent diff --git a/releasenotes/notes/agent-telemetry-prom2dd-counter-5cde7684d71e8a6d.yaml b/releasenotes/notes/agent-telemetry-prom2dd-counter-5cde7684d71e8a6d.yaml new file mode 100644 index 0000000000000..1bf82635af8b0 --- /dev/null +++ b/releasenotes/notes/agent-telemetry-prom2dd-counter-5cde7684d71e8a6d.yaml @@ -0,0 +1,13 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +enhancements: + - | + Convert Prometheus style Counters and Histograms used in Agent telemetry + from monotonically increasing to non-monotonic values (reset on each scrape). + In addition de-accumulate Prometheus Histogram bucket values on each scrape. diff --git a/releasenotes/notes/crio-collector-image-metadata-d721b1e797e82770.yaml b/releasenotes/notes/crio-collector-image-metadata-d721b1e797e82770.yaml new file mode 100644 index 0000000000000..e77e8815cbded --- /dev/null +++ b/releasenotes/notes/crio-collector-image-metadata-d721b1e797e82770.yaml @@ -0,0 +1,11 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +features: + - | + Added support for collecting container image metadata when running on a CRI-O runtime. diff --git a/releasenotes/notes/enable-gotls-by-default-f9c4fe517d075bcc.yaml b/releasenotes/notes/enable-gotls-by-default-f9c4fe517d075bcc.yaml new file mode 100644 index 0000000000000..87c5929483ed0 --- /dev/null +++ b/releasenotes/notes/enable-gotls-by-default-f9c4fe517d075bcc.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + USM now monitors TLS traffic encrypted with Go TLS by default. + To disable this feature, set the `service_monitoring_config.tls.go.enabled` + configuration option to false. diff --git a/releasenotes/notes/enable-istio-tls-by-default-1da65945911f278a.yaml b/releasenotes/notes/enable-istio-tls-by-default-1da65945911f278a.yaml new file mode 100644 index 0000000000000..88417827a2524 --- /dev/null +++ b/releasenotes/notes/enable-istio-tls-by-default-1da65945911f278a.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + USM now monitors traffic encrypted with Istio mTLS by default. + To disable this feature, set the `service_monitoring_config.tls.istio.enabled` configuration option to false. diff --git a/releasenotes/notes/network-path-initial-windows-support-2a51f80234a2b0a1.yaml b/releasenotes/notes/network-path-initial-windows-support-2a51f80234a2b0a1.yaml new file mode 100644 index 0000000000000..1ff9197162c8c --- /dev/null +++ b/releasenotes/notes/network-path-initial-windows-support-2a51f80234a2b0a1.yaml @@ -0,0 +1,11 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +enhancements: + - | + Adds initial Windows support for TCP probes in Network Path. diff --git a/releasenotes/notes/unify-cluster-check-tag-0a8e854517742c0f.yaml b/releasenotes/notes/unify-cluster-check-tag-0a8e854517742c0f.yaml new file mode 100644 index 0000000000000..5da73bd832bcf --- /dev/null +++ b/releasenotes/notes/unify-cluster-check-tag-0a8e854517742c0f.yaml @@ -0,0 +1,11 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +enhancements: + - | + Standardized cluster check tagging across all environments, allowing DD_TAGS, DD_EXTRA_TAGS, DD_CLUSTER_CHECKS_EXTRA_TAGS, and DD_ORCHESTRATOR_EXPLORER_EXTRA_TAGS to apply to all cluster check data when operating on the Cluster Agent, Node Agent, or Cluster Checks Runner. diff --git a/tasks/agent.py b/tasks/agent.py index 3399e4d7c4a58..190c7b04c0916 100644 --- a/tasks/agent.py +++ b/tasks/agent.py @@ -14,7 +14,7 @@ from invoke import task from invoke.exceptions import Exit -from tasks.build_tags import filter_incompatible_tags, get_build_tags, get_default_build_tags +from tasks.build_tags import add_fips_tags, filter_incompatible_tags, get_build_tags, get_default_build_tags from tasks.devcontainer import run_on_devcontainer from tasks.flavor import AgentFlavor from tasks.libs.common.utils import ( @@ -127,7 +127,6 @@ def build( install_path=None, embedded_path=None, rtloader_root=None, - python_home_2=None, python_home_3=None, major_version='7', exclude_rtloader=False, @@ -150,6 +149,7 @@ def build( if flavor.is_ot(): # for agent build purposes the UA agent is just like base flavor = AgentFlavor.base + fips_mode = flavor.is_fips() if not exclude_rtloader and not flavor.is_iot(): # If embedded_path is set, we should give it to rtloader as it should install the headers/libs @@ -163,7 +163,6 @@ def build( install_path=install_path, embedded_path=embedded_path, rtloader_root=rtloader_root, - python_home_2=python_home_2, python_home_3=python_home_3, major_version=major_version, ) @@ -193,6 +192,7 @@ def build( exclude_tags = [] if build_exclude is None else build_exclude.split(",") build_tags = get_build_tags(include_tags, exclude_tags) + build_tags = add_fips_tags(build_tags, fips_mode) cmd = "go build -mod={go_mod} {race_opt} {build_type} -tags \"{go_build_tags}\" " diff --git a/tasks/build_tags.py b/tasks/build_tags.py index 593219ef96b56..caa73601e65b6 100644 --- a/tasks/build_tags.py +++ b/tasks/build_tags.py @@ -105,6 +105,8 @@ } ) +FIPS_AGENT_TAGS = AGENT_TAGS.union({"goexperiment.systemcrypto"}) + # CLUSTER_AGENT_TAGS lists the tags needed when building the cluster-agent CLUSTER_AGENT_TAGS = {"clusterchecks", "datadog.no_waf", "kubeapiserver", "orchestrator", "zlib", "zstd", "ec2", "gce"} @@ -235,6 +237,11 @@ "lint": DOGSTATSD_TAGS.union(UNIT_TEST_TAGS).difference(UNIT_TEST_EXCLUDE_TAGS), "unit-tests": DOGSTATSD_TAGS.union(UNIT_TEST_TAGS).difference(UNIT_TEST_EXCLUDE_TAGS), }, + AgentFlavor.fips: { + "agent": FIPS_AGENT_TAGS, + "lint": FIPS_AGENT_TAGS.union(UNIT_TEST_TAGS).difference(UNIT_TEST_EXCLUDE_TAGS), + "unit-tests": FIPS_AGENT_TAGS.union(UNIT_TEST_TAGS).difference(UNIT_TEST_EXCLUDE_TAGS), + }, } @@ -414,3 +421,9 @@ def compute_config_build_tags(targets="all", build_include=None, build_exclude=N build_exclude = [] if build_exclude is None else build_exclude.split(",") use_tags = get_build_tags(build_include, build_exclude) return use_tags + + +def add_fips_tags(tags: list[str], fips_mode: bool) -> list[str]: + if fips_mode: + tags.append("goexperiment.systemcrypto") + return tags diff --git a/tasks/flavor.py b/tasks/flavor.py index 8b38d93b00181..d935211441053 100644 --- a/tasks/flavor.py +++ b/tasks/flavor.py @@ -8,9 +8,13 @@ class AgentFlavor(enum.Enum): heroku = 3 dogstatsd = 4 ot = 5 + fips = 6 def is_iot(self): return self == type(self).iot def is_ot(self): return self == type(self).ot + + def is_fips(self): + return self == type(self).fips diff --git a/tasks/github_tasks.py b/tasks/github_tasks.py index abe46fd6ff366..54a4aad6ab53c 100644 --- a/tasks/github_tasks.py +++ b/tasks/github_tasks.py @@ -18,7 +18,7 @@ print_workflow_conclusion, trigger_macos_workflow, ) -from tasks.libs.common.color import color_message +from tasks.libs.common.color import Color, color_message from tasks.libs.common.constants import DEFAULT_INTEGRATIONS_CORE_BRANCH from tasks.libs.common.datadog_api import create_gauge, send_event, send_metrics from tasks.libs.common.git import get_default_branch @@ -546,3 +546,36 @@ def agenttelemetry_list_change_ack_check(_, pr_id=-1): print( "'need-change/agenttelemetry-governance' label found on the PR: potential change to Agent Telemetry metrics is acknowledged and the governance instructions are followed." ) + + +@task +def get_required_checks(_, branch: str = "main"): + """ + For this task to work: + - A Personal Access Token (PAT) needs the "repo" permissions. + - A fine-grained token needs the "Administration" repository permissions (read). + """ + from tasks.libs.ciproviders.github_api import GithubAPI + + gh = GithubAPI() + required_checks = gh.get_branch_required_checks(branch) + print(required_checks) + + +@task(iterable=['check']) +def add_required_checks(_, branch: str, check: str, force: bool = False): + """ + For this task to work: + - A Personal Access Token (PAT) needs the "repo" permissions. + - A fine-grained token needs the "Administration" repository permissions (write). + + Use it like this: + inv github.add-required-checks --branch=main --check="dd-gitlab/lint_codeowners" --check="dd-gitlab/lint_components" + """ + from tasks.libs.ciproviders.github_api import GithubAPI + + if not check: + raise Exit(color_message("No check name provided, exiting", Color.RED), code=1) + + gh = GithubAPI() + gh.add_branch_required_check(branch, check, force) diff --git a/tasks/gotest.py b/tasks/gotest.py index 3ef5ebc9de905..e9403679ea40a 100644 --- a/tasks/gotest.py +++ b/tasks/gotest.py @@ -255,7 +255,6 @@ def test( race=False, profile=False, rtloader_root=None, - python_home_2=None, python_home_3=None, cpus=None, major_version='7', @@ -303,7 +302,6 @@ def test( ldflags, gcflags, env = get_build_flags( ctx, rtloader_root=rtloader_root, - python_home_2=python_home_2, python_home_3=python_home_3, major_version=major_version, ) diff --git a/tasks/installer.py b/tasks/installer.py index 1ed92c807a507..af259b1b47dbe 100644 --- a/tasks/installer.py +++ b/tasks/installer.py @@ -2,6 +2,7 @@ installer namespaced tasks """ +import base64 import os import shutil @@ -75,6 +76,57 @@ def build( ctx.run(cmd, env=env) +@task +def build_linux_script( + ctx, + signing_key_id=None, +): + ''' + Builds the linux script that is used to install the agent on linux. + ''' + amd64_path = os.path.join(BIN_PATH, "bootstrapper-linux-amd64") + arm64_path = os.path.join(BIN_PATH, "bootstrapper-linux-arm64") + ctx.run( + f'inv -e installer.build --bootstrapper --rebuild --no-no-strip-binary --output-bin {amd64_path} --no-cgo', + env={'GOOS': 'linux', 'GOARCH': 'amd64'}, + ) + ctx.run( + f'inv -e installer.build --bootstrapper --rebuild --no-no-strip-binary --output-bin {arm64_path} --no-cgo', + env={'GOOS': 'linux', 'GOARCH': 'arm64'}, + ) + with open(amd64_path, 'rb') as f: + amd64_b64 = base64.encodebytes(f.read()).decode('utf-8') + with open(arm64_path, 'rb') as f: + arm64_b64 = base64.encodebytes(f.read()).decode('utf-8') + + with open('pkg/fleet/installer/setup.sh') as f: + setup_content = f.read() + setup_content = setup_content.replace('INSTALLER_BIN_LINUX_AMD64', amd64_b64) + setup_content = setup_content.replace('INSTALLER_BIN_LINUX_ARM64', arm64_b64) + + commit_sha = ctx.run('git rev-parse HEAD', hide=True).stdout.strip() + setup_content = setup_content.replace('INSTALLER_COMMIT', commit_sha) + + with open(os.path.join(BIN_PATH, 'setup.sh'), 'w') as f: + f.write(setup_content) + + if signing_key_id: + ctx.run( + f'gpg --armor --batch --yes --output {os.path.join(BIN_PATH, "setup.sh.asc")} --clearsign --digest-algo SHA256 --default-key {signing_key_id} {os.path.join(BIN_PATH, "setup.sh")}', + ) + # Add the signed footer to the setup.sh file + with ( + open(os.path.join(BIN_PATH, "setup.sh.asc")) as signed_file, + open(os.path.join(BIN_PATH, 'setup.sh'), 'w') as f, + ): + skip_header = False + for line in signed_file: + if skip_header: + f.write(line) + elif line.strip() == "": # Empty line marks end of header + skip_header = True + + @task def push_artifact( ctx, diff --git a/tasks/libs/ciproviders/github_api.py b/tasks/libs/ciproviders/github_api.py index f8ed33e3a3252..4d1b3e6a6319b 100644 --- a/tasks/libs/ciproviders/github_api.py +++ b/tasks/libs/ciproviders/github_api.py @@ -11,9 +11,10 @@ import requests -from tasks.libs.common.color import color_message +from tasks.libs.common.color import Color, color_message from tasks.libs.common.constants import GITHUB_REPO_NAME from tasks.libs.common.git import get_default_branch +from tasks.libs.common.user_interactions import yes_no_question try: import semver @@ -90,6 +91,136 @@ def get_milestone_by_name(self, milestone_name): return milestone return None + def get_branch_protection(self, branch_name: str): + """ + Get the protection of a given branch + """ + branch = self.get_branch(branch_name) + if not branch: + raise Exit(color_message(f"Branch {branch_name} not found", Color.RED), code=1) + elif not branch.protected: + raise Exit(color_message(f"Branch {branch_name} doesn't have protection", Color.RED), code=1) + try: + protection = branch.get_protection() + except GithubException as e: + if e.status == 403: + error_msg = f"""Can't access {branch_name} branch protection, probably due to missing permissions. You need either: + - A Personal Access Token (PAT) needs the "repo" permissions. + - Or a fine-grained token needs the "Administration" repository permissions. +""" + raise PermissionError(error_msg) from e + raise + return protection + + def protection_to_payload(self, protection_raw_data: dict) -> dict: + """ + Convert the protection object to a payload. + See https://docs.github.com/en/rest/branches/branch-protection?apiVersion=2022-11-28#update-branch-protection + + The following seems to be defined at the Org scale, so we're not resending them here: + - required_pull_request_reviews > dismissal_restrictions + - required_pull_request_reviews > bypass_pull_request_allowances + """ + prot = protection_raw_data + return { + "required_status_checks": { + "strict": prot["required_status_checks"]["strict"], + "checks": [ + {"context": check["context"], "app_id": -1 if check["app_id"] is None else check["app_id"]} + for check in prot["required_status_checks"]["checks"] + ], + }, + "enforce_admins": prot["enforce_admins"]["enabled"], + "required_pull_request_reviews": { + "dismiss_stale_reviews": prot["required_pull_request_reviews"]["dismiss_stale_reviews"], + "require_code_owner_reviews": prot["required_pull_request_reviews"]["require_code_owner_reviews"], + "required_approving_review_count": prot["required_pull_request_reviews"][ + "required_approving_review_count" + ], + "require_last_push_approval": prot["required_pull_request_reviews"]["require_last_push_approval"], + }, + "restrictions": { + "users": prot["restrictions"]["users"], + "teams": prot["restrictions"]["teams"], + "apps": [app["slug"] for app in prot["restrictions"]["apps"]], + }, + "required_linear_history": prot["required_linear_history"]["enabled"], + "allow_force_pushes": prot["allow_force_pushes"]["enabled"], + "allow_deletions": prot["allow_deletions"]["enabled"], + "block_creations": prot["block_creations"]["enabled"], + "required_conversation_resolution": prot["required_conversation_resolution"]["enabled"], + "lock_branch": prot["lock_branch"]["enabled"], + "allow_fork_syncing": prot["allow_fork_syncing"]["enabled"], + } + + def get_branch_required_checks(self, branch_name: str) -> list[str]: + """ + Get the required checks for a given branch + """ + return self.get_branch_protection(branch_name).required_status_checks.contexts + + def add_branch_required_check(self, branch_name: str, checks: list[str], force: bool = False) -> None: + """ + Add required checks to a given branch + + It uses the Github API directly to add the required checks to the branch. + Using the "checks" argument is not supported by PyGithub. + :calls: `PUT /repos/{owner}/{repo}/branches/{branch}/protection + + """ + current_protection = self.get_branch_protection(branch_name) + current_required_checks = current_protection.required_status_checks.contexts + new_required_checks = [] + for check in checks: + if check in current_required_checks: + print( + color_message( + f"Ignoring the '{check}' check as it is already required on the {branch_name} branch", + Color.ORANGE, + ) + ) + else: + new_required_checks.append(check) + if not new_required_checks: + print(color_message("No new checks to add", Color.GREEN)) + return + print( + color_message( + f"Warning: You are about to add the following checks to the {branch_name} branch:\n{new_required_checks}", + Color.ORANGE, + ) + ) + print(color_message(f"Current required checks: {sorted(current_required_checks)}", Color.GREY)) + if force or yes_no_question("Are you sure?", default=False): + # We're crafting the request and not using PyGithub because it doesn't support passing the checks variable instead of contexts. + protection_url = f"{self.repo.url}/branches/{branch_name}/protection" + headers = { + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}", + "X-GitHub-Api-Version": "2022-11-28", + } + payload = self.protection_to_payload(current_protection.raw_data) + payload["required_status_checks"]["checks"] = sorted( + payload["required_status_checks"]["checks"] + + [{"context": check, "app_id": -1} for check in new_required_checks], + key=lambda x: x['context'], + ) + + response = requests.put(protection_url, headers=headers, json=payload, timeout=10) + if response.status_code != 200: + print( + color_message( + f"Error while sending the PUT request to {protection_url}\n{response.text}", Color.RED + ) + ) + raise Exit( + color_message(f"Failed to update the required checks for the {branch_name} branch", Color.RED), + code=1, + ) + print(color_message(f"The {checks} checks were successfully added!", Color.GREEN)) + else: + print(color_message("Aborting changes to the branch required checks", Color.GREEN)) + def is_release_note_needed(self, pull_number): """ Check if labels are ok for skipping QA diff --git a/tasks/libs/common/utils.py b/tasks/libs/common/utils.py index 44758ae35b543..c6a024b60c0ae 100644 --- a/tasks/libs/common/utils.py +++ b/tasks/libs/common/utils.py @@ -203,7 +203,6 @@ def get_build_flags( run_path=None, embedded_path=None, rtloader_root=None, - python_home_2=None, python_home_3=None, major_version='7', headless_mode=False, @@ -243,8 +242,6 @@ def get_build_flags( ldflags += f"-X {REPO_PATH}/pkg/config/setup.defaultRunPath={run_path} " # setting python homes in the code - if python_home_2: - ldflags += f"-X {REPO_PATH}/pkg/collector/python.pythonHome2={python_home_2} " if python_home_3: ldflags += f"-X {REPO_PATH}/pkg/collector/python.pythonHome3={python_home_3} " diff --git a/tasks/omnibus.py b/tasks/omnibus.py index 10cbc9db9b899..649992fdcfb44 100644 --- a/tasks/omnibus.py +++ b/tasks/omnibus.py @@ -91,6 +91,7 @@ def get_omnibus_env( flavor=AgentFlavor.base, pip_config_file="pip.conf", custom_config_dir=None, + fips_mode=False, ): env = load_release_versions(ctx, release_version) @@ -133,6 +134,9 @@ def get_omnibus_env( if custom_config_dir: env["OUTPUT_CONFIG_DIR"] = custom_config_dir + if fips_mode: + env['FIPS_MODE'] = 'true' + # We need to override the workers variable in omnibus build when running on Kubernetes runners, # otherwise, ohai detect the number of CPU on the host and run the make jobs with all the CPU. kubernetes_cpu_request = os.environ.get('KUBERNETES_CPU_REQUEST') @@ -187,6 +191,7 @@ def build( """ flavor = AgentFlavor[flavor] + fips_mode = flavor.is_fips() durations = {} if not skip_deps: with timed(quiet=True) as durations['Deps']: @@ -211,6 +216,7 @@ def build( flavor=flavor, pip_config_file=pip_config_file, custom_config_dir=config_directory, + fips_mode=fips_mode, ) if not target_project: diff --git a/tasks/process_agent.py b/tasks/process_agent.py index 6c3481d9c5213..d1020f3e88899 100644 --- a/tasks/process_agent.py +++ b/tasks/process_agent.py @@ -6,7 +6,7 @@ from invoke import task from invoke.exceptions import Exit -from tasks.build_tags import filter_incompatible_tags, get_build_tags, get_default_build_tags +from tasks.build_tags import add_fips_tags, filter_incompatible_tags, get_build_tags, get_default_build_tags from tasks.flavor import AgentFlavor from tasks.libs.common.utils import REPO_PATH, bin_name, get_build_flags from tasks.system_probe import copy_ebpf_and_related_files @@ -35,6 +35,7 @@ def build( flavor = AgentFlavor[flavor] if flavor.is_ot(): flavor = AgentFlavor.base + fips_mode = flavor.is_fips() ldflags, gcflags, env = get_build_flags( ctx, @@ -67,6 +68,7 @@ def build( build_exclude = [] if build_exclude is None else build_exclude.split(",") build_tags = get_build_tags(build_include, build_exclude) + build_tags = add_fips_tags(build_tags, fips_mode) if os.path.exists(BIN_PATH): os.remove(BIN_PATH) diff --git a/tasks/release.py b/tasks/release.py index 5e0b11483a938..394f239f7f879 100644 --- a/tasks/release.py +++ b/tasks/release.py @@ -132,8 +132,9 @@ def __get_force_option(force: bool) -> str: return force_option -def __tag_single_module(ctx, module, agent_version, commit, push, force_option, devel): +def __tag_single_module(ctx, module, agent_version, commit, force_option, devel): """Tag a given module.""" + tags = [] for tag in module.tag(agent_version): if devel: tag += "-devel" @@ -146,9 +147,8 @@ def __tag_single_module(ctx, module, agent_version, commit, push, force_option, message = f"Could not create tag {tag}. Please rerun the task to retry creating the tags (you may need the --force option)" raise Exit(color_message(message, "red"), code=1) print(f"Created tag {tag}") - if push: - ctx.run(f"git push origin {tag}{force_option}") - print(f"Pushed tag {tag}") + tags.append(tag) + return tags @task @@ -173,11 +173,17 @@ def tag_modules(ctx, agent_version, commit="HEAD", verify=True, push=True, force check_version(agent_version) force_option = __get_force_option(force) + tags = [] for module in get_default_modules().values(): # Skip main module; this is tagged at tag_version via __tag_single_module. if module.should_tag and module.path != ".": - __tag_single_module(ctx, module, agent_version, commit, push, force_option, devel) + new_tags = __tag_single_module(ctx, module, agent_version, commit, force_option, devel) + tags.extend(new_tags) + if push: + tags_list = ' '.join(tags) + ctx.run(f"git push origin {tags_list}{force_option}") + print(f"Pushed tag {tags_list}") print(f"Created module tags for version {agent_version}") @@ -203,7 +209,11 @@ def tag_version(ctx, agent_version, commit="HEAD", verify=True, push=True, force # Always tag the main module force_option = __get_force_option(force) - __tag_single_module(ctx, get_default_modules()["."], agent_version, commit, push, force_option, devel) + tags = __tag_single_module(ctx, get_default_modules()["."], agent_version, commit, force_option, devel) + if push: + tags_list = ' '.join(tags) + ctx.run(f"git push origin {tags_list}{force_option}") + print(f"Pushed tag {tags_list}") print(f"Created tags for version {agent_version}") diff --git a/tasks/security_agent.py b/tasks/security_agent.py index 3d0dc7e752d95..f591473c2217e 100644 --- a/tasks/security_agent.py +++ b/tasks/security_agent.py @@ -14,7 +14,7 @@ from invoke.tasks import task from tasks.agent import generate_config -from tasks.build_tags import get_default_build_tags +from tasks.build_tags import add_fips_tags, get_default_build_tags from tasks.go import run_golangci_lint from tasks.libs.build.ninja import NinjaWriter from tasks.libs.common.git import get_commit_sha, get_current_branch @@ -58,6 +58,7 @@ def build( go_mod="mod", skip_assets=False, static=False, + fips_mode=False, ): """ Build the security agent @@ -88,6 +89,7 @@ def build( ldflags += ' '.join([f"-X '{main + key}={value}'" for key, value in ld_vars.items()]) build_tags += get_default_build_tags(build="security-agent") + build_tags = add_fips_tags(build_tags, fips_mode) if os.path.exists(BIN_PATH): os.remove(BIN_PATH) diff --git a/tasks/system_probe.py b/tasks/system_probe.py index 2517cc71a2edd..5ddffee1496c8 100644 --- a/tasks/system_probe.py +++ b/tasks/system_probe.py @@ -19,7 +19,7 @@ from invoke.exceptions import Exit from invoke.tasks import task -from tasks.build_tags import UNIT_TEST_TAGS, get_default_build_tags +from tasks.build_tags import UNIT_TEST_TAGS, add_fips_tags, get_default_build_tags from tasks.libs.build.ninja import NinjaWriter from tasks.libs.common.color import color_message from tasks.libs.common.git import get_commit_sha @@ -735,6 +735,7 @@ def build_sysprobe_binary( install_path=None, bundle_ebpf=False, strip_binary=False, + fips_mode=False, static=False, ) -> None: arch_obj = Arch.from_str(arch) @@ -748,6 +749,7 @@ def build_sysprobe_binary( ) build_tags = get_default_build_tags(build="system-probe") + build_tags = add_fips_tags(build_tags, fips_mode) if bundle_ebpf: build_tags.append(BUNDLE_TAG) if strip_binary: diff --git a/tasks/trace_agent.py b/tasks/trace_agent.py index 812116eeb3c32..a9503e77c3453 100644 --- a/tasks/trace_agent.py +++ b/tasks/trace_agent.py @@ -3,7 +3,7 @@ from invoke import Exit, task -from tasks.build_tags import filter_incompatible_tags, get_build_tags, get_default_build_tags +from tasks.build_tags import add_fips_tags, filter_incompatible_tags, get_build_tags, get_default_build_tags from tasks.flavor import AgentFlavor from tasks.libs.common.utils import REPO_PATH, bin_name, get_build_flags from tasks.windows_resources import build_messagetable, build_rc, versioninfo_vars @@ -30,6 +30,7 @@ def build( flavor = AgentFlavor[flavor] if flavor.is_ot(): flavor = AgentFlavor.base + fips_mode = flavor.is_fips() ldflags, gcflags, env = get_build_flags( ctx, @@ -59,6 +60,7 @@ def build( build_exclude = [] if build_exclude is None else build_exclude.split(",") build_tags = get_build_tags(build_include, build_exclude) + build_tags = add_fips_tags(build_tags, fips_mode) race_opt = "-race" if race else "" build_type = "-a" if rebuild else "" diff --git a/tasks/winbuildscripts/unittests.ps1 b/tasks/winbuildscripts/unittests.ps1 index 5464db3dc228c..1b0f5e18ed438 100644 --- a/tasks/winbuildscripts/unittests.ps1 +++ b/tasks/winbuildscripts/unittests.ps1 @@ -54,7 +54,7 @@ if($err -ne 0){ Write-Host -ForegroundColor Red "Agent build failed $err" [Environment]::Exit($err) } -& inv -e test --junit-tar="$Env:JUNIT_TAR" --race --profile --rerun-fails=2 --coverage --cpus 8 --python-home-2=$Env:Python2_ROOT_DIR --python-home-3=$Env:Python3_ROOT_DIR --save-result-json C:\mnt\$test_output_file $Env:EXTRA_OPTS --build-stdlib $TEST_WASHER_FLAG +& inv -e test --junit-tar="$Env:JUNIT_TAR" --race --profile --rerun-fails=2 --coverage --cpus 8 --python-home-3=$Env:Python3_ROOT_DIR --save-result-json C:\mnt\$test_output_file $Env:EXTRA_OPTS --build-stdlib $TEST_WASHER_FLAG If ($LASTEXITCODE -ne "0") { exit $LASTEXITCODE } diff --git a/test/new-e2e/go.mod b/test/new-e2e/go.mod index 4c388ea79fb24..ad07f9d5683ec 100644 --- a/test/new-e2e/go.mod +++ b/test/new-e2e/go.mod @@ -60,7 +60,7 @@ require ( // `TEST_INFRA_DEFINITIONS_BUILDIMAGES` matches the commit sha in the module version // Example: github.com/DataDog/test-infra-definitions v0.0.0-YYYYMMDDHHmmSS-0123456789AB // => TEST_INFRA_DEFINITIONS_BUILDIMAGES: 0123456789AB - github.com/DataDog/test-infra-definitions v0.0.0-20241115164330-7cd5e8a62570 + github.com/DataDog/test-infra-definitions v0.0.0-20241127134930-047dd64128b6 github.com/aws/aws-sdk-go-v2 v1.32.2 github.com/aws/aws-sdk-go-v2/config v1.27.40 github.com/aws/aws-sdk-go-v2/service/ec2 v1.164.2 @@ -78,7 +78,7 @@ require ( github.com/pulumi/pulumi-awsx/sdk/v2 v2.16.1 github.com/pulumi/pulumi-eks/sdk/v2 v2.7.8 // indirect github.com/pulumi/pulumi-kubernetes/sdk/v4 v4.17.1 - github.com/pulumi/pulumi/sdk/v3 v3.137.0 + github.com/pulumi/pulumi/sdk/v3 v3.140.0 github.com/samber/lo v1.47.0 github.com/stretchr/testify v1.9.0 github.com/xeipuuv/gojsonschema v1.2.0 @@ -303,8 +303,9 @@ require ( github.com/pulumi/pulumi-azure-native-sdk/authorization/v2 v2.67.0 // indirect github.com/pulumi/pulumi-azure-native-sdk/compute/v2 v2.56.0 // indirect github.com/pulumi/pulumi-azure-native-sdk/containerservice/v2 v2.67.0 // indirect + github.com/pulumi/pulumi-azure-native-sdk/managedidentity/v2 v2.73.1 // indirect github.com/pulumi/pulumi-azure-native-sdk/network/v2 v2.67.0 // indirect - github.com/pulumi/pulumi-azure-native-sdk/v2 v2.67.0 // indirect + github.com/pulumi/pulumi-azure-native-sdk/v2 v2.73.1 // indirect github.com/pulumi/pulumi-gcp/sdk/v6 v6.67.1 // indirect github.com/pulumi/pulumi-gcp/sdk/v7 v7.38.0 // indirect github.com/twinj/uuid v0.0.0-20151029044442-89173bcdda19 // indirect diff --git a/test/new-e2e/go.sum b/test/new-e2e/go.sum index fcf50dc3486ce..e1bf799b32880 100644 --- a/test/new-e2e/go.sum +++ b/test/new-e2e/go.sum @@ -16,8 +16,8 @@ github.com/DataDog/datadog-go/v5 v5.5.0 h1:G5KHeB8pWBNXT4Jtw0zAkhdxEAWSpWH00geHI github.com/DataDog/datadog-go/v5 v5.5.0/go.mod h1:K9kcYBlxkcPP8tvvjZZKs/m1edNAUFzBbdpTUKfCsuw= github.com/DataDog/mmh3 v0.0.0-20200805151601-30884ca2197a h1:m9REhmyaWD5YJ0P53ygRHxKKo+KM+nw+zz0hEdKztMo= github.com/DataDog/mmh3 v0.0.0-20200805151601-30884ca2197a/go.mod h1:SvsjzyJlSg0rKsqYgdcFxeEVflx3ZNAyFfkUHP0TxXg= -github.com/DataDog/test-infra-definitions v0.0.0-20241115164330-7cd5e8a62570 h1:vVkrzQIPIhgxZP+GMd+9UhILnZTj1Uf4wZlxhcDGysA= -github.com/DataDog/test-infra-definitions v0.0.0-20241115164330-7cd5e8a62570/go.mod h1:l0n0FQYdWWQxbI5a2EkuynRQIteUQcYOaOhdxD9TvJs= +github.com/DataDog/test-infra-definitions v0.0.0-20241127134930-047dd64128b6 h1:7Cy8Iju8X6XdwqXyTrkke1ULq/yEikXVEwAgg4yCGFg= +github.com/DataDog/test-infra-definitions v0.0.0-20241127134930-047dd64128b6/go.mod h1:YYNx5mySRiinvCoTQIkToR8PcBXpxrRIW/HqmTw9XAY= github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ= github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= github.com/DataDog/zstd_0 v0.0.0-20210310093942-586c1286621f h1:5Vuo4niPKFkfwW55jV4vY0ih3VQ9RaQqeqY67fvRn8A= @@ -415,10 +415,12 @@ github.com/pulumi/pulumi-azure-native-sdk/compute/v2 v2.56.0 h1:MFOd6X9FPlixzriy github.com/pulumi/pulumi-azure-native-sdk/compute/v2 v2.56.0/go.mod h1:453Ff5wNscroYfq+zxME7Nbt7HdZv+dh0zLZwLyGBws= github.com/pulumi/pulumi-azure-native-sdk/containerservice/v2 v2.67.0 h1:jvruQQSO1ESk7APFQ3mAge7C9SWKU9nbBHrilcyeSGU= github.com/pulumi/pulumi-azure-native-sdk/containerservice/v2 v2.67.0/go.mod h1:d5nmekK1mrjM9Xo/JGGVlAs7mqqftBo3DmKji+1zbmw= +github.com/pulumi/pulumi-azure-native-sdk/managedidentity/v2 v2.73.1 h1:rkNZDAik+qlIhbmFoa09ln/oJMXey5+olw8ShmljgXc= +github.com/pulumi/pulumi-azure-native-sdk/managedidentity/v2 v2.73.1/go.mod h1:P/N/xG2lVxsHdspmKjH+d8d4ln+2arXBmOl3zhjWnnw= github.com/pulumi/pulumi-azure-native-sdk/network/v2 v2.67.0 h1:r26Xl6FdOJnbLs1ny9ekuRjFxAocZK8jS8SLrgXKEFE= github.com/pulumi/pulumi-azure-native-sdk/network/v2 v2.67.0/go.mod h1:8yXZtmHe2Zet5pb8gZ7D730d0VAm4kYUdwCj7sjhz6g= -github.com/pulumi/pulumi-azure-native-sdk/v2 v2.67.0 h1:FgfXLypiQ/DKWRPQpyNaftXcGl5HVgA93msBZTQ6Ddk= -github.com/pulumi/pulumi-azure-native-sdk/v2 v2.67.0/go.mod h1:0y4wJUCX1eA3ZSn0jJIRXtHeJA7qgbPfkrR9qvj+5D4= +github.com/pulumi/pulumi-azure-native-sdk/v2 v2.73.1 h1:yzXxwwq3tHdtSOi5vjKmKXq7HyKvDaKulF53MFTMbh8= +github.com/pulumi/pulumi-azure-native-sdk/v2 v2.73.1/go.mod h1:ChjIUNDNeN6jI33ZOivHUFqM6purDiLP01mghMGe1Fs= github.com/pulumi/pulumi-command/sdk v1.0.1 h1:ZuBSFT57nxg/fs8yBymUhKLkjJ6qmyN3gNvlY/idiN0= github.com/pulumi/pulumi-command/sdk v1.0.1/go.mod h1:C7sfdFbUIoXKoIASfXUbP/U9xnwPfxvz8dBpFodohlA= github.com/pulumi/pulumi-docker/sdk/v4 v4.5.5 h1:7OjAfgLz5PAy95ynbgPAlWls5WBe4I/QW/61TdPWRlQ= @@ -437,8 +439,8 @@ github.com/pulumi/pulumi-random/sdk/v4 v4.16.6 h1:M9BSF13bQxj74C61nBTVITrsgT6oRR github.com/pulumi/pulumi-random/sdk/v4 v4.16.6/go.mod h1:l5ew7S/G1GspPLH9KeWXqxQ4ZmS2hh2sEMv3bW9M3yc= github.com/pulumi/pulumi-tls/sdk/v4 v4.11.1 h1:tXemWrzeVTqG8zq6hBdv1TdPFXjgZ+dob63a/6GlF1o= github.com/pulumi/pulumi-tls/sdk/v4 v4.11.1/go.mod h1:hODo3iEmmXDFOXqPK+V+vwI0a3Ww7BLjs5Tgamp86Ng= -github.com/pulumi/pulumi/sdk/v3 v3.137.0 h1:bxhYpOY7Z4xt+VmezEpHuhjpOekkaMqOjzxFg/1OhCw= -github.com/pulumi/pulumi/sdk/v3 v3.137.0/go.mod h1:PvKsX88co8XuwuPdzolMvew5lZV+4JmZfkeSjj7A6dI= +github.com/pulumi/pulumi/sdk/v3 v3.140.0 h1:+Z/RBvdYg7tBNkBwk4p/FzlV7niBT3TbLAICq/Y0LDU= +github.com/pulumi/pulumi/sdk/v3 v3.140.0/go.mod h1:PvKsX88co8XuwuPdzolMvew5lZV+4JmZfkeSjj7A6dI= github.com/pulumiverse/pulumi-time/sdk v0.1.0 h1:xfi9HKDgV+GgDxQ23oSv9KxC3DQqViGTcMrJICRgJv0= github.com/pulumiverse/pulumi-time/sdk v0.1.0/go.mod h1:NUa1zA74DF002WrM6iF111A6UjX9knPpXufVRvBwNyg= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= diff --git a/test/new-e2e/pkg/environments/aws/kubernetes/eks.go b/test/new-e2e/pkg/environments/aws/kubernetes/eks.go index 0b75531ec4f73..22971ae20f9ed 100644 --- a/test/new-e2e/pkg/environments/aws/kubernetes/eks.go +++ b/test/new-e2e/pkg/environments/aws/kubernetes/eks.go @@ -142,9 +142,11 @@ func EKSRunFunc(ctx *pulumi.Context, env *environments.Kubernetes, params *Provi return err } - // dogstatsd clients that report to the dogstatsd standalone deployment - if _, err := dogstatsd.K8sAppDefinition(&awsEnv, cluster.KubeProvider, "workload-dogstatsd-standalone", dogstatsdstandalone.HostPort, dogstatsdstandalone.Socket, utils.PulumiDependsOn(cluster)); err != nil { - return err + if params.deployDogstatsd { + // dogstatsd clients that report to the dogstatsd standalone deployment + if _, err := dogstatsd.K8sAppDefinition(&awsEnv, cluster.KubeProvider, "workload-dogstatsd-standalone", dogstatsdstandalone.HostPort, dogstatsdstandalone.Socket, utils.PulumiDependsOn(cluster)); err != nil { + return err + } } if _, err := tracegen.K8sAppDefinition(&awsEnv, cluster.KubeProvider, "workload-tracegen", utils.PulumiDependsOn(cluster)); err != nil { diff --git a/test/new-e2e/pkg/environments/aws/kubernetes/kind.go b/test/new-e2e/pkg/environments/aws/kubernetes/kind.go index cdf3efa123931..60da0620167f9 100644 --- a/test/new-e2e/pkg/environments/aws/kubernetes/kind.go +++ b/test/new-e2e/pkg/environments/aws/kubernetes/kind.go @@ -167,9 +167,11 @@ agents: return err } - // dogstatsd clients that report to the dogstatsd standalone deployment - if _, err := dogstatsd.K8sAppDefinition(&awsEnv, kubeProvider, "workload-dogstatsd-standalone", dogstatsdstandalone.HostPort, dogstatsdstandalone.Socket); err != nil { - return err + if params.deployDogstatsd { + // dogstatsd clients that report to the dogstatsd standalone deployment + if _, err := dogstatsd.K8sAppDefinition(&awsEnv, kubeProvider, "workload-dogstatsd-standalone", dogstatsdstandalone.HostPort, dogstatsdstandalone.Socket); err != nil { + return err + } } if _, err := tracegen.K8sAppDefinition(&awsEnv, kubeProvider, "workload-tracegen"); err != nil { diff --git a/test/new-e2e/tests/agent-platform/common/agent_integration.go b/test/new-e2e/tests/agent-platform/common/agent_integration.go index 4016dfb33ff0a..dda703b8d7f90 100644 --- a/test/new-e2e/tests/agent-platform/common/agent_integration.go +++ b/test/new-e2e/tests/agent-platform/common/agent_integration.go @@ -23,48 +23,48 @@ func CheckIntegrationInstall(t *testing.T, client *TestClient) { freezeContent, err := client.FileManager.ReadFile(requirementIntegrationPath) require.NoError(t, err) - freezeContent = ciliumRegex.ReplaceAll(freezeContent, []byte("datadog-cilium==2.2.1")) + freezeContent = ciliumRegex.ReplaceAll(freezeContent, []byte("datadog-cilium==4.0.0")) _, err = client.FileManager.WriteFile(requirementIntegrationPath, freezeContent) require.NoError(t, err) t.Run("install-uninstall package", func(tt *testing.T) { - installIntegration(tt, client, "datadog-cilium==2.2.1") + installIntegration(tt, client, "datadog-cilium==4.0.0") freezeRequirement := client.AgentClient.Integration(agentclient.WithArgs([]string{"freeze"})) - require.Contains(tt, freezeRequirement, "datadog-cilium==2.2.1", "before removal integration should be in freeze") + require.Contains(tt, freezeRequirement, "datadog-cilium==4.0.0", "before removal integration should be in freeze") client.AgentClient.Integration(agentclient.WithArgs([]string{"remove", "-r", "datadog-cilium"})) freezeRequirementNew := client.AgentClient.Integration(agentclient.WithArgs([]string{"freeze"})) - require.NotContains(tt, freezeRequirementNew, "datadog-cilium==2.2.1", "after removal integration should not be in freeze") + require.NotContains(tt, freezeRequirementNew, "datadog-cilium==4.0.0", "after removal integration should not be in freeze") }) t.Run("upgrade a package", func(tt *testing.T) { - installIntegration(tt, client, "datadog-cilium==2.2.1") + installIntegration(tt, client, "datadog-cilium==4.0.0") freezeRequirement := client.AgentClient.Integration(agentclient.WithArgs([]string{"freeze"})) - require.NotContains(tt, freezeRequirement, "datadog-cilium==2.3.0", "before update integration should not be in 2.3.0") + require.NotContains(tt, freezeRequirement, "datadog-cilium==5.0.0", "before update integration should not be in 5.0.0") - installIntegration(tt, client, "datadog-cilium==2.3.0") + installIntegration(tt, client, "datadog-cilium==5.0.0") freezeRequirementNew := client.AgentClient.Integration(agentclient.WithArgs([]string{"freeze"})) - require.Contains(tt, freezeRequirementNew, "datadog-cilium==2.3.0", "after update integration should be in 2.3.0") + require.Contains(tt, freezeRequirementNew, "datadog-cilium==5.0.0", "after update integration should be in 5.0.0") }) t.Run("downgrade a package", func(tt *testing.T) { - installIntegration(tt, client, "datadog-cilium==2.3.0") + installIntegration(tt, client, "datadog-cilium==5.0.0") freezeRequirement := client.AgentClient.Integration(agentclient.WithArgs([]string{"freeze"})) - require.NotContains(tt, freezeRequirement, "datadog-cilium==2.2.1", "before downgrade integration should not be in 2.2.1") + require.NotContains(tt, freezeRequirement, "datadog-cilium==4.0.0", "before downgrade integration should not be in 4.0.0") - installIntegration(tt, client, "datadog-cilium==2.2.1") + installIntegration(tt, client, "datadog-cilium==4.0.0") freezeRequirementNew := client.AgentClient.Integration(agentclient.WithArgs([]string{"freeze"})) - require.Contains(tt, freezeRequirementNew, "datadog-cilium==2.2.1", "after downgrade integration should be in 2.2.1") + require.Contains(tt, freezeRequirementNew, "datadog-cilium==4.0.0", "after downgrade integration should be in 4.0.0") }) t.Run("downgrade to older version than shipped", func(tt *testing.T) { - _, err := client.AgentClient.IntegrationWithError(agentclient.WithArgs([]string{"install", "-r", "datadog-cilium==2.2.0"})) + _, err := client.AgentClient.IntegrationWithError(agentclient.WithArgs([]string{"install", "-r", "datadog-cilium==3.6.0"})) require.Error(tt, err, "should raise error when trying to install version older than the one shipped") }) } @@ -80,7 +80,7 @@ func installIntegration(t *testing.T, client *TestClient, integration string) { maxRetries := 6 err := backoff.Retry(func() error { - _, err := client.AgentClient.IntegrationWithError(agentclient.WithArgs([]string{"install", "-r", integration})) + _, err := client.AgentClient.IntegrationWithError(agentclient.WithArgs([]string{"install", "--unsafe-disable-verification", "-r", integration})) return err }, backoff.WithMaxRetries(backoff.NewConstantBackOff(interval), uint64(maxRetries))) diff --git a/test/new-e2e/tests/installer/host/host.go b/test/new-e2e/tests/installer/host/host.go index 369c50f0bf5db..c98684ab498fa 100644 --- a/test/new-e2e/tests/installer/host/host.go +++ b/test/new-e2e/tests/installer/host/host.go @@ -143,7 +143,7 @@ func (h *Host) DeletePath(path string) { // WaitForUnitActive waits for a systemd unit to be active func (h *Host) WaitForUnitActive(units ...string) { for _, unit := range units { - _, err := h.remote.Execute(fmt.Sprintf("timeout=30; unit=%s; while ! systemctl is-active --quiet $unit && [ $timeout -gt 0 ]; do sleep 1; ((timeout--)); done; [ $timeout -ne 0 ]", unit)) + _, err := h.remote.Execute(fmt.Sprintf("timeout=60; unit=%s; while ! systemctl is-active --quiet $unit && [ $timeout -gt 0 ]; do sleep 1; ((timeout--)); done; [ $timeout -ne 0 ]", unit)) require.NoError(h.t, err, "unit %s did not become active. logs: %s", unit, h.remote.MustExecute("sudo journalctl -xeu "+unit)) } } @@ -151,7 +151,7 @@ func (h *Host) WaitForUnitActive(units ...string) { // WaitForUnitActivating waits for a systemd unit to be activating func (h *Host) WaitForUnitActivating(units ...string) { for _, unit := range units { - _, err := h.remote.Execute(fmt.Sprintf("timeout=30; unit=%s; while ! grep -q \"Active: activating\" <(sudo systemctl status $unit) && [ $timeout -gt 0 ]; do sleep 1; ((timeout--)); done; [ $timeout -ne 0 ]", unit)) + _, err := h.remote.Execute(fmt.Sprintf("timeout=60; unit=%s; while ! grep -q \"Active: activating\" <(sudo systemctl status $unit) && [ $timeout -gt 0 ]; do sleep 1; ((timeout--)); done; [ $timeout -ne 0 ]", unit)) require.NoError(h.t, err, "unit %s did not become activating. logs: %s", unit, h.remote.MustExecute("sudo journalctl -xeu "+unit)) } diff --git a/test/new-e2e/tests/installer/unix/all_packages_test.go b/test/new-e2e/tests/installer/unix/all_packages_test.go index 075668a50a3b4..f213a0fda8394 100644 --- a/test/new-e2e/tests/installer/unix/all_packages_test.go +++ b/test/new-e2e/tests/installer/unix/all_packages_test.go @@ -12,6 +12,7 @@ import ( "regexp" "strings" "testing" + "time" e2eos "github.com/DataDog/test-infra-definitions/components/os" "github.com/DataDog/test-infra-definitions/scenarios/aws/ec2" @@ -198,9 +199,18 @@ func (s *packageBaseSuite) RunInstallScript(params ...string) { require.NoErrorf(s.T(), err, "installer not properly installed. logs: \n%s\n%s", s.Env().RemoteHost.MustExecute("cat /tmp/datadog-installer-stdout.log"), s.Env().RemoteHost.MustExecute("cat /tmp/datadog-installer-stderr.log")) case InstallMethodAnsible: // Install ansible then install the agent - ansiblePrefix := s.installAnsible(s.os) - - s.Env().RemoteHost.MustExecute(fmt.Sprintf("%sansible-galaxy collection install -vvv datadog.dd", ansiblePrefix)) + var ansiblePrefix string + for i := 0; i < 3; i++ { + ansiblePrefix = s.installAnsible(s.os) + _, err := s.Env().RemoteHost.Execute(fmt.Sprintf("%sansible-galaxy collection install -vvv datadog.dd", ansiblePrefix)) + if err == nil { + break + } + if i == 2 { + s.T().Fatal("failed to install ansible-galaxy collection after 3 attempts") + } + time.Sleep(time.Second) + } // Write the playbook env := InstallScriptEnv(s.arch) diff --git a/test/new-e2e/tests/installer/unix/package_installer_test.go b/test/new-e2e/tests/installer/unix/package_installer_test.go index e5a94ece0b18e..8588ee018d210 100644 --- a/test/new-e2e/tests/installer/unix/package_installer_test.go +++ b/test/new-e2e/tests/installer/unix/package_installer_test.go @@ -40,7 +40,7 @@ func (s *packageInstallerSuite) TestInstall() { state.AssertDirExists("/opt/datadog-packages/run/locks", 0777, "root", "root") state.AssertDirExists("/opt/datadog-installer", 0755, "root", "root") - state.AssertDirExists("/opt/datadog-installer/tmp", 0755, "dd-agent", "dd-agent") + state.AssertDirExists("/opt/datadog-packages/tmp", 0755, "dd-agent", "dd-agent") state.AssertDirExists("/opt/datadog-packages", 0755, "root", "root") state.AssertDirExists("/opt/datadog-packages/datadog-installer", 0755, "root", "root") diff --git a/test/new-e2e/tests/installer/unix/upgrade_scenario_test.go b/test/new-e2e/tests/installer/unix/upgrade_scenario_test.go index 537dc408201e2..2ed3db8edffca 100644 --- a/test/new-e2e/tests/installer/unix/upgrade_scenario_test.go +++ b/test/new-e2e/tests/installer/unix/upgrade_scenario_test.go @@ -403,7 +403,7 @@ func (s *upgradeScenarioSuite) TestConfigUpgradeSuccessful() { s.host.WaitForFileExists(true, "/opt/datadog-packages/run/installer.sock") state := s.host.State() - state.AssertSymlinkExists("/etc/datadog-packages/datadog-agent/stable", "/etc/datadog-packages/datadog-agent/e94406c45ae766b7d34d2793e4759b9c4d15ed5d5e2b7f73ce1bf0e6836f728d", "root", "root") + state.AssertSymlinkExists("/etc/datadog-agent/managed/datadog-agent/stable", "/etc/datadog-agent/managed/datadog-agent/e94406c45ae766b7d34d2793e4759b9c4d15ed5d5e2b7f73ce1bf0e6836f728d", "root", "root") localCDN.UpdateLayer("config", "\"log_level\": \"error\"") s.executeConfigGoldenPath(localCDN.DirPath, "c78c5e96820c89c6cbc178ddba4ce20a167138a3a580ed4637369a9c5ed804c3") @@ -437,7 +437,7 @@ func (s *upgradeScenarioSuite) TestConfigUpgradeNewAgents() { ) state := s.host.State() - state.AssertSymlinkExists("/etc/datadog-packages/datadog-agent/stable", "/etc/datadog-packages/datadog-agent/e94406c45ae766b7d34d2793e4759b9c4d15ed5d5e2b7f73ce1bf0e6836f728d", "root", "root") + state.AssertSymlinkExists("/etc/datadog-agent/managed/datadog-agent/stable", "/etc/datadog-agent/managed/datadog-agent/e94406c45ae766b7d34d2793e4759b9c4d15ed5d5e2b7f73ce1bf0e6836f728d", "root", "root") // Enables security agent & sysprobe localCDN.AddLayer("config", ` @@ -511,8 +511,8 @@ func (s *upgradeScenarioSuite) TestConfigUpgradeNewAgents() { ) state = s.host.State() - state.AssertSymlinkExists("/etc/datadog-packages/datadog-agent/stable", fmt.Sprintf("/etc/datadog-packages/datadog-agent/%s", hash), "root", "root") - state.AssertSymlinkExists("/etc/datadog-packages/datadog-agent/experiment", fmt.Sprintf("/etc/datadog-packages/datadog-agent/%s", hash), "root", "root") + state.AssertSymlinkExists("/etc/datadog-agent/managed/datadog-agent/stable", fmt.Sprintf("/etc/datadog-agent/managed/datadog-agent/%s", hash), "root", "root") + state.AssertSymlinkExists("/etc/datadog-agent/managed/datadog-agent/experiment", fmt.Sprintf("/etc/datadog-agent/managed/datadog-agent/%s", hash), "root", "root") } func (s *upgradeScenarioSuite) TestUpgradeConfigFromExistingExperiment() { @@ -831,7 +831,7 @@ func (s *upgradeScenarioSuite) assertSuccessfulConfigStartExperiment(timestamp h ) state := s.host.State() - state.AssertSymlinkExists("/etc/datadog-packages/datadog-agent/experiment", fmt.Sprintf("/etc/datadog-packages/datadog-agent/%s", hash), "root", "root") + state.AssertSymlinkExists("/etc/datadog-agent/managed/datadog-agent/experiment", fmt.Sprintf("/etc/datadog-agent/managed/datadog-agent/%s", hash), "root", "root") } func (s *upgradeScenarioSuite) assertSuccessfulConfigPromoteExperiment(timestamp host.JournaldTimestamp, hash string) { @@ -852,8 +852,8 @@ func (s *upgradeScenarioSuite) assertSuccessfulConfigPromoteExperiment(timestamp ) state := s.host.State() - state.AssertSymlinkExists("/etc/datadog-packages/datadog-agent/stable", fmt.Sprintf("/etc/datadog-packages/datadog-agent/%s", hash), "root", "root") - state.AssertSymlinkExists("/etc/datadog-packages/datadog-agent/experiment", fmt.Sprintf("/etc/datadog-packages/datadog-agent/%s", hash), "root", "root") + state.AssertSymlinkExists("/etc/datadog-agent/managed/datadog-agent/stable", fmt.Sprintf("/etc/datadog-agent/managed/datadog-agent/%s", hash), "root", "root") + state.AssertSymlinkExists("/etc/datadog-agent/managed/datadog-agent/experiment", fmt.Sprintf("/etc/datadog-agent/managed/datadog-agent/%s", hash), "root", "root") } func (s *upgradeScenarioSuite) assertSuccessfulConfigStopExperiment(timestamp host.JournaldTimestamp) { @@ -872,7 +872,7 @@ func (s *upgradeScenarioSuite) assertSuccessfulConfigStopExperiment(timestamp ho ) state := s.host.State() - state.AssertSymlinkExists("/etc/datadog-packages/datadog-agent/experiment", "/etc/datadog-packages/datadog-agent/stable", "root", "root") + state.AssertSymlinkExists("/etc/datadog-agent/managed/datadog-agent/experiment", "/etc/datadog-agent/managed/datadog-agent/stable", "root", "root") } func (s *upgradeScenarioSuite) getInstallerStatus() installerStatus { diff --git a/test/new-e2e/tests/otel/utils/config_utils.go b/test/new-e2e/tests/otel/utils/config_utils.go index a4daa89e3baa8..ddee2793648fa 100644 --- a/test/new-e2e/tests/otel/utils/config_utils.go +++ b/test/new-e2e/tests/otel/utils/config_utils.go @@ -21,7 +21,6 @@ import ( "k8s.io/apimachinery/pkg/fields" extension "github.com/DataDog/datadog-agent/comp/otelcol/ddflareextension/def" - "github.com/DataDog/datadog-agent/pkg/util/testutil/flake" "github.com/DataDog/datadog-agent/test/fakeintake/client/flare" ) @@ -105,7 +104,6 @@ func TestOTelFlareExtensionResponse(s OTelTestSuite, providedCfg string, fullCfg // TestOTelFlareFiles tests that the OTel Agent flares contain the expected files func TestOTelFlareFiles(s OTelTestSuite) { - flake.Mark(s.T()) err := s.Env().FakeIntake.Client().FlushServerAndResetAggregators() require.NoError(s.T(), err) agent := getAgentPod(s) diff --git a/test/regression/cases/quality_gate_logs/datadog-agent/conf.d/disk-listener.d/conf.yaml b/test/regression/cases/quality_gate_logs/datadog-agent/conf.d/disk-listener.d/conf.yaml new file mode 100644 index 0000000000000..ec51a59de1c46 --- /dev/null +++ b/test/regression/cases/quality_gate_logs/datadog-agent/conf.d/disk-listener.d/conf.yaml @@ -0,0 +1,5 @@ +logs: + - type: file + path: "/smp-shared/*.log" + service: "my-service" + source: "my-client-app" diff --git a/test/regression/cases/quality_gate_logs/datadog-agent/datadog.yaml b/test/regression/cases/quality_gate_logs/datadog-agent/datadog.yaml new file mode 100644 index 0000000000000..d5aa08d9135ed --- /dev/null +++ b/test/regression/cases/quality_gate_logs/datadog-agent/datadog.yaml @@ -0,0 +1,19 @@ +auth_token_file_path: /tmp/agent-auth-token + +# Disable cloud detection. This stops the Agent from poking around the +# execution environment & network. This is particularly important if the target +# has network access. +cloud_provider_metadata: [] + +dd_url: http://127.0.0.1:9091 + +logs_enabled: true +logs_config: + logs_dd_url: 127.0.0.1:9092 + logs_no_ssl: true + force_use_http: true + +process_config.process_dd_url: http://localhost:9093 + +telemetry.enabled: true +telemetry.checks: '*' diff --git a/test/regression/cases/quality_gate_logs/experiment.yaml b/test/regression/cases/quality_gate_logs/experiment.yaml new file mode 100644 index 0000000000000..b1b2d9ee9c02b --- /dev/null +++ b/test/regression/cases/quality_gate_logs/experiment.yaml @@ -0,0 +1,38 @@ +optimization_goal: cpu +erratic: false + +target: + name: datadog-agent + command: /bin/entrypoint.sh + cpu_allotment: 8 + memory_allotment: 4GiB + + environment: + DD_API_KEY: 00000001 + DD_HOSTNAME: smp-regression + + profiling_environment: + DD_INTERNAL_PROFILING_BLOCK_PROFILE_RATE: 10000 + DD_INTERNAL_PROFILING_CPU_DURATION: 1m + DD_INTERNAL_PROFILING_DELTA_PROFILES: true + DD_INTERNAL_PROFILING_ENABLED: true + DD_INTERNAL_PROFILING_ENABLE_GOROUTINE_STACKTRACES: true + DD_INTERNAL_PROFILING_MUTEX_PROFILE_FRACTION: 10 + DD_INTERNAL_PROFILING_PERIOD: 1m + DD_INTERNAL_PROFILING_UNIX_SOCKET: /var/run/datadog/apm.socket + DD_PROFILING_EXECUTION_TRACE_ENABLED: true + DD_PROFILING_EXECUTION_TRACE_PERIOD: 1m + DD_PROFILING_WAIT_PROFILE: true + +checks: + - name: memory_usage + description: "Memory usage" + bounds: + series: total_rss_bytes + upper_bound: 440MiB + + - name: lost_bytes + description: "Allowable bytes not polled by log Agent" + bounds: + series: lost_bytes + upper_bound: 0KiB diff --git a/test/regression/cases/quality_gate_logs/lading/lading.yaml b/test/regression/cases/quality_gate_logs/lading/lading.yaml new file mode 100644 index 0000000000000..44bd1dda27ef0 --- /dev/null +++ b/test/regression/cases/quality_gate_logs/lading/lading.yaml @@ -0,0 +1,27 @@ +generator: + - file_gen: + logrotate_fs: + seed: [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, + 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131] + load_profile: + constant: 500KiB + concurrent_logs: 2 + maximum_bytes_per_log: 50MiB + total_rotations: 5 + max_depth: 0 + variant: "ascii" + maximum_prebuild_cache_size_bytes: 300MiB + mount_point: /smp-shared + +blackhole: + - http: + binding_addr: "127.0.0.1:9091" + - http: + binding_addr: "127.0.0.1:9092" + response_delay_millis: 75 + - http: + binding_addr: "127.0.0.1:9093" + +target_metrics: + - prometheus: + uri: "http://127.0.0.1:5000/telemetry"