From d3f316473c3627a4a2d2862b191bf91bcef8b72f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Beauz=C3=A9e-Luyssen?= Date: Wed, 17 Apr 2024 16:38:13 +0200 Subject: [PATCH] Enable omnibus build cache (#20117) Co-authored-by: alopezz Co-authored-by: Pythyu --- .github/CODEOWNERS | 1 + .gitlab-ci.yml | 1 + .gitlab/package_build/installer.yml | 7 +- .gitlab/package_build/windows.yml | 1 + .../config/software/datadog-agent-finalize.rb | 2 + .../datadog-agent-integrations-py2.rb | 2 + .../datadog-agent-integrations-py3.rb | 2 + omnibus/config/software/datadog-agent.rb | 2 + .../datadog-security-agent-policies.rb | 2 + omnibus/config/software/system-probe.rb | 2 + omnibus/omnibus.rb | 8 +- tasks/agent.py | 4 +- tasks/libs/common/omnibus.py | 174 ++++++++++++++++++ tasks/omnibus.py | 66 ++++++- 14 files changed, 268 insertions(+), 6 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 31925e1e39eeb..dec9ee9b60fb5 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -523,6 +523,7 @@ /tasks/components.py @DataDog/agent-shared-components /tasks/components_templates @DataDog/agent-shared-components /tasks/updater.py @DataDog/fleet +/tasks/libs/omnibus_cache.py @DataDog/agent-build-and-releases /tasks/installer.py @DataDog/fleet /test/ @DataDog/agent-developer-tools /test/benchmarks/ @DataDog/agent-metrics-logs diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 75d30569e8e5c..55347f159882d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -149,6 +149,7 @@ variables: ## build to succeed with S3 caching disabled. S3_OMNIBUS_CACHE_BUCKET: dd-ci-datadog-agent-omnibus-cache-build-stable USE_S3_CACHING: --omnibus-s3-cache + OMNIBUS_GIT_CACHE_DIR: /tmp/omnibus-git-cache ## comment out the line below to disable integration wheels cache INTEGRATION_WHEELS_CACHE_BUCKET: dd-agent-omnibus S3_DD_AGENT_OMNIBUS_LLVM_URI: s3://dd-agent-omnibus/llvm diff --git a/.gitlab/package_build/installer.yml b/.gitlab/package_build/installer.yml index 1b889d267bb9f..6cf8e3d928dc7 100644 --- a/.gitlab/package_build/installer.yml +++ b/.gitlab/package_build/installer.yml @@ -21,7 +21,7 @@ - chmod 0744 /tmp/system-probe/clang-bpf /tmp/system-probe/llc-bpf # NOTE: for now, we consider "ociru" to be a "redhat_target" in omnibus/lib/ostools.rb # if we ever start building on a different platform, that might need to change - - inv -e omnibus.build --release-version "$RELEASE_VERSION" --major-version "$AGENT_MAJOR_VERSION" --python-runtimes "$PYTHON_RUNTIMES" --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --system-probe-bin=/tmp/system-probe --host-distribution=ociru + - inv -e omnibus.build --release-version "$RELEASE_VERSION" --major-version "$AGENT_MAJOR_VERSION" --python-runtimes "$PYTHON_RUNTIMES" --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --system-probe-bin=/tmp/system-probe --host-distribution=ociru --install-directory="$INSTALL_DIR" - ls -la $OMNIBUS_PACKAGE_DIR - !reference [.upload_sbom_artifacts] variables: @@ -94,7 +94,7 @@ datadog-agent-oci-arm64-a7: - rm -rf $OMNIBUS_PACKAGE_DIR/* # Artifacts and cache must live within project directory but we run omnibus in a neutral directory. # Thus, we move the artifacts at the end in a gitlab-friendly dir. - - inv -e omnibus.build --release-version "$RELEASE_VERSION" --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --target-project="installer" + - inv -e omnibus.build --release-version "$RELEASE_VERSION" --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --target-project="installer" ${INSTALL_DIR_PARAM} - ls -la $OMNIBUS_PACKAGE_DIR - $S3_CP_CMD $OMNIBUS_PACKAGE_DIR/datadog-installer-*-${PACKAGE_ARCH}.tar.xz $S3_ARTIFACTS_URI/$DESTINATION_FILE - !reference [.upload_sbom_artifacts] @@ -143,6 +143,8 @@ installer-amd64-oci: before_script: - source /root/.bashrc - export INSTALL_DIR=/opt/datadog-packages/datadog-installer/$(inv agent.version -u)-1 + - export INSTALL_DIR_PARAM="--install-directory=$INSTALL_DIR" + installer-arm64-oci: extends: installer-arm64 @@ -151,3 +153,4 @@ installer-arm64-oci: before_script: - source /root/.bashrc - export INSTALL_DIR=/opt/datadog-packages/datadog-installer/$(inv agent.version -u)-1 + - export INSTALL_DIR_PARAM="--install-directory=$INSTALL_DIR" diff --git a/.gitlab/package_build/windows.yml b/.gitlab/package_build/windows.yml index 547927fb50a18..6cd10d2e2ab0b 100644 --- a/.gitlab/package_build/windows.yml +++ b/.gitlab/package_build/windows.yml @@ -21,6 +21,7 @@ -e CI_JOB_NAME_SLUG=${CI_JOB_NAME_SLUG} -e CI_COMMIT_REF_NAME=${CI_COMMIT_REF_NAME} -e OMNIBUS_TARGET=${OMNIBUS_TARGET} + -e OMNIBUS_GIT_CACHE_DIR="C:\TEMP\omnibus-git-cache" -e WINDOWS_BUILDER=true -e RELEASE_VERSION="$RELEASE_VERSION" -e MAJOR_VERSION="$AGENT_MAJOR_VERSION" diff --git a/omnibus/config/software/datadog-agent-finalize.rb b/omnibus/config/software/datadog-agent-finalize.rb index 72e0e320c1284..f838390c5fe83 100644 --- a/omnibus/config/software/datadog-agent-finalize.rb +++ b/omnibus/config/software/datadog-agent-finalize.rb @@ -14,6 +14,8 @@ skip_transitive_dependency_licensing true +always_build true + build do license :project_license diff --git a/omnibus/config/software/datadog-agent-integrations-py2.rb b/omnibus/config/software/datadog-agent-integrations-py2.rb index ed4c43ac07060..5b0c15f776f54 100644 --- a/omnibus/config/software/datadog-agent-integrations-py2.rb +++ b/omnibus/config/software/datadog-agent-integrations-py2.rb @@ -21,6 +21,8 @@ source git: 'https://github.com/DataDog/integrations-core.git' +always_build true + integrations_core_version = ENV['INTEGRATIONS_CORE_VERSION'] if integrations_core_version.nil? || integrations_core_version.empty? integrations_core_version = 'master' diff --git a/omnibus/config/software/datadog-agent-integrations-py3.rb b/omnibus/config/software/datadog-agent-integrations-py3.rb index 7a7d9ddf81dd6..e3e6a7607aa42 100644 --- a/omnibus/config/software/datadog-agent-integrations-py3.rb +++ b/omnibus/config/software/datadog-agent-integrations-py3.rb @@ -21,6 +21,8 @@ source git: 'https://github.com/DataDog/integrations-core.git' +always_build true + integrations_core_version = ENV['INTEGRATIONS_CORE_VERSION'] if integrations_core_version.nil? || integrations_core_version.empty? integrations_core_version = 'master' diff --git a/omnibus/config/software/datadog-agent.rb b/omnibus/config/software/datadog-agent.rb index d3b7f162915d2..c97552522f296 100644 --- a/omnibus/config/software/datadog-agent.rb +++ b/omnibus/config/software/datadog-agent.rb @@ -18,6 +18,8 @@ source path: '..' relative_path 'src/github.com/DataDog/datadog-agent' +always_build true + build do license :project_license diff --git a/omnibus/config/software/datadog-security-agent-policies.rb b/omnibus/config/software/datadog-security-agent-policies.rb index 181c9aab15e88..8e20b797aa799 100644 --- a/omnibus/config/software/datadog-security-agent-policies.rb +++ b/omnibus/config/software/datadog-security-agent-policies.rb @@ -20,6 +20,8 @@ end default_version policies_version +always_build true + build do license "Apache-2.0" license_file "./LICENSE" diff --git a/omnibus/config/software/system-probe.rb b/omnibus/config/software/system-probe.rb index 6a042e080e7f9..de1ba341d3d51 100644 --- a/omnibus/config/software/system-probe.rb +++ b/omnibus/config/software/system-probe.rb @@ -8,6 +8,8 @@ source path: '..' relative_path 'src/github.com/DataDog/datadog-agent' +always_build true + build do license :project_license diff --git a/omnibus/omnibus.rb b/omnibus/omnibus.rb index 443342902ca65..626b6d43aec4c 100644 --- a/omnibus/omnibus.rb +++ b/omnibus/omnibus.rb @@ -37,4 +37,10 @@ s3_instance_profile true end end -use_git_caching false + +if not ENV.has_key?("OMNIBUS_GIT_CACHE_DIR") + use_git_caching false +else + use_git_caching true + git_cache_dir ENV["OMNIBUS_GIT_CACHE_DIR"] +end diff --git a/tasks/agent.py b/tasks/agent.py index 590a4a30b5920..79015cb0b7baf 100644 --- a/tasks/agent.py +++ b/tasks/agent.py @@ -322,8 +322,8 @@ def refresh_assets(_, build_tags, development=True, flavor=AgentFlavor.base.name # Ensure the config folders are not world writable os.chmod(check_dir, mode=0o755) - ## add additional windows-only corechecks, only on windows. Otherwise the check loader - ## on linux will throw an error because the module is not found, but the config is. + # add additional windows-only corechecks, only on windows. Otherwise the check loader + # on linux will throw an error because the module is not found, but the config is. if sys.platform == 'win32': for check in WINDOWS_CORECHECKS: check_dir = os.path.join(dist_folder, f"conf.d/{check}.d/") diff --git a/tasks/libs/common/omnibus.py b/tasks/libs/common/omnibus.py index 1c86021e8b7af..22d59f897fe49 100644 --- a/tasks/libs/common/omnibus.py +++ b/tasks/libs/common/omnibus.py @@ -1,9 +1,169 @@ +import hashlib import json import os import sys from datetime import datetime import requests +from release import _get_release_json_value + + +def _get_build_images(ctx): + # We intentionally include both build images & their test suffixes in the pattern + # as a test image and the merged version shouldn't share their cache + tags = ctx.run("grep -E 'DATADOG_AGENT_.*BUILDIMAGES' .gitlab-ci.yml | cut -d ':' -f 2", hide='stdout').stdout + return (t.strip() for t in tags.splitlines()) + + +def _get_omnibus_commits(field): + if 'RELEASE_VERSION' in os.environ: + release_version = os.environ['RELEASE_VERSION'] + else: + release_version = os.environ['RELEASE_VERSION_7'] + return _get_release_json_value(f'{release_version}::{field}') + + +def _get_environment_for_cache() -> dict: + """ + Compute a hash from the environment after excluding irrelevant/insecure + environment variables to ensure we don't omit a variable + """ + + def env_filter(item): + key = item[0] + excluded_prefixes = [ + 'AGENT_', + 'API_KEY_', + 'APP_KEY_', + 'ARTIFACTORY_', + 'AWS_', + 'BUILDENV_', + 'CI_', + 'CHOCOLATEY_', + 'CLUSTER_AGENT_', + 'DATADOG_AGENT_', + 'DD_', + 'DEB_', + 'DESTINATION_', + 'DOCKER_', + 'E2E_TESTS_', + 'EMISSARY_', + 'EXECUTOR_', + 'FF_', + 'GITLAB_', + 'GIT_', + 'JIRA_', + 'K8S_', + 'KITCHEN_', + 'KERNEL_MATRIX_TESTING_', + 'KUBERNETES_', + 'MACOS_GITHUB_', + 'OMNIBUS_', + 'POD_', + 'RELEASE_VERSION', + 'RPM_', + 'RUN_', + 'S3_', + 'SMP_', + 'SSH_', + 'TEST_INFRA_', + 'USE_', + 'VAULT_', + 'WINDOWS_', + ] + excluded_suffixes = [ + '_SHA256', + '_VERSION', + ] + excluded_values = [ + "AVAILABILITY_ZONE", + "BENCHMARKS_CI_IMAGE", + "BUCKET_BRANCH", + "BUNDLER_VERSION", + "CHANGELOG_COMMIT_SHA_SSM_NAME", + "CLANG_LLVM_VER", + "CHANNEL", + "CI", + "COMPUTERNAME" "CONSUL_HTTP_ADDR", + "DOGSTATSD_BINARIES_DIR", + "EXPERIMENTS_EVALUATION_ADDRESS", + "GCE_METADATA_HOST", + "GENERAL_ARTIFACTS_CACHE_BUCKET_URL", + "GET_SOURCES_ATTEMPTS", + "GO_TEST_SKIP_FLAKE", + "HOME", + "HOSTNAME", + "HOST_IP", + "INSTALL_SCRIPT_API_KEY_SSM_NAME", + "INTEGRATION_WHEELS_CACHE_BUCKET", + "IRBRC", + "KITCHEN_INFRASTRUCTURE_FLAKES_RETRY", + "LESSCLOSE", + "LESSOPEN", + "LC_CTYPE", + "LS_COLORS", + "MACOS_S3_BUCKET", + "MESSAGE", + "OLDPWD", + "PROCESS_S3_BUCKET", + "PWD", + "PYTHON_RUNTIMES", + "RESTORE_CACHE_ATTEMPTS", + "RUNNER_TEMP_PROJECT_DIR", + "RUSTC_SHA256", + "RUST_VERSION", + "SHLVL", + "STATIC_BINARIES_DIR", + "STATSD_URL", + "SYSTEM_PROBE_BINARIES_DIR", + "TRACE_AGENT_URL", + "USE_CACHING_PROXY_PYTHON", + "USE_CACHING_PROXY_RUBY", + "USE_S3_CACHING", + "USERDOMAIN", + "USERNAME", + "USERPROFILE", + "VCPKG_BLOB_SAS_URL_SSM_NAME", + "WIN_S3_BUCKET", + "WINGET_PAT_SSM_NAME", + "_", + "build_before", + ] + for p in excluded_prefixes: + if key.startswith(p): + return False + for s in excluded_suffixes: + if key.endswith(s): + return False + if key in excluded_values: + return False + return True + + return dict(filter(env_filter, sorted(os.environ.items()))) + + +def omnibus_compute_cache_key(ctx): + print('Computing cache key') + h = hashlib.sha1() + omnibus_last_commit = ctx.run('git log -n 1 --pretty=format:%H omnibus/', hide='stdout').stdout + h.update(str.encode(omnibus_last_commit)) + print(f'\tLast omnibus commit is {omnibus_last_commit}') + buildimages_hash = _get_build_images(ctx) + for img_hash in buildimages_hash: + h.update(str.encode(img_hash)) + omnibus_ruby_commit = _get_omnibus_commits('OMNIBUS_RUBY_VERSION') + omnibus_software_commit = _get_omnibus_commits('OMNIBUS_SOFTWARE_VERSION') + print(f'Omnibus ruby commit: {omnibus_ruby_commit}') + print(f'Omnibus software commit: {omnibus_software_commit}') + h.update(str.encode(omnibus_ruby_commit)) + h.update(str.encode(omnibus_software_commit)) + environment = _get_environment_for_cache() + for k, v in environment.items(): + print(f'\tUsing environment variable {k} to compute cache key') + h.update(str.encode(f'{k}={v}')) + cache_key = h.hexdigest() + print(f'Cache key: {cache_key}') + return cache_key def should_retry_bundle_install(res): @@ -110,3 +270,17 @@ def send_build_metrics(ctx, overall_duration): else: print(f'Failed to send build metrics to DataDog: {r.status_code}') print(r.text) + + +def install_dir_for_project(project): + if project == "agent" or project == "iot-agent": + folder = 'datadog-agent' + elif project == 'dogstatsd': + folder = 'datadog-dogstatsd' + elif project == 'agentless-scanner': + folder = os.path.join('datadog', 'agentless-scanner') + elif project == 'installer': + folder = 'datadog-installer' + else: + raise NotImplementedError(f'Unknown project {project}') + return os.path.join('opt', folder) diff --git a/tasks/omnibus.py b/tasks/omnibus.py index 11b1889a41f79..cd16a6b7dc4f6 100644 --- a/tasks/omnibus.py +++ b/tasks/omnibus.py @@ -5,7 +5,12 @@ from tasks.flavor import AgentFlavor from tasks.go import deps -from tasks.libs.common.omnibus import send_build_metrics, should_retry_bundle_install +from tasks.libs.common.omnibus import ( + install_dir_for_project, + omnibus_compute_cache_key, + send_build_metrics, + should_retry_bundle_install, +) from tasks.libs.common.utils import get_version, load_release_versions, timed from tasks.ssm import get_pfx_pass, get_signing_cert @@ -137,6 +142,9 @@ def get_omnibus_env( env['DEPLOY_AGENT'] = os.environ.get('DEPLOY_AGENT') if 'PACKAGE_ARCH' in os.environ: env['PACKAGE_ARCH'] = os.environ.get('PACKAGE_ARCH') + if 'INSTALL_DIR' in os.environ: + print('Forwarding INSTALL_DIR') + env['INSTALL_DIR'] = os.environ.get('INSTALL_DIR') return env @@ -167,6 +175,7 @@ def build( python_mirror=None, pip_config_file="pip.conf", host_distribution=None, + install_directory=None, target_project=None, ): """ @@ -206,6 +215,8 @@ def build( elif agent_binaries: target_project = "agent-binaries" + aws_cmd = "aws.cmd" if sys.platform == 'win32' else "aws" + # Get the python_mirror from the PIP_INDEX_URL environment variable if it is not passed in the args python_mirror = python_mirror or os.environ.get("PIP_INDEX_URL") @@ -219,6 +230,43 @@ def build( with timed(quiet=True) as bundle_elapsed: bundle_install_omnibus(ctx, gem_path, env) + omnibus_cache_dir = os.environ.get('OMNIBUS_GIT_CACHE_DIR') + use_omnibus_git_cache = omnibus_cache_dir is not None + if use_omnibus_git_cache: + # The cache will be written in the provided cache dir (see omnibus.rb) but + # the git repository itself will be located in a subfolder that replicates + # the install_dir hierarchy + # For instance if git_cache_dir is set to "/git/cache/dir" and install_dir is + # set to /a/b/c, the cache git repository will be located in + # /git/cache/dir/a/b/c/.git + if install_directory is None: + install_directory = install_dir_for_project(target_project) + # Is the path starts with a /, it's considered the new root for the joined path + # which effectively drops whatever was in omnibus_cache_dir + install_directory = install_directory.lstrip('/') + omnibus_cache_dir = os.path.join(omnibus_cache_dir, install_directory) + remote_cache_name = os.environ.get('CI_JOB_NAME_SLUG') + # We don't want to update the cache when not running on a CI + # Individual developers are still able to leverage the cache by providing + # the OMNIBUS_GIT_CACHE_DIR env variable, but they won't pull from the CI + # generated one. + use_remote_cache = remote_cache_name is not None + if use_remote_cache: + cache_state = None + cache_key = omnibus_compute_cache_key(ctx) + git_cache_url = f"s3://{os.environ['S3_OMNIBUS_CACHE_BUCKET']}/builds/{cache_key}/{remote_cache_name}" + bundle_path = ( + "/tmp/omnibus-git-cache-bundle" if sys.platform != 'win32' else "C:\\TEMP\\omnibus-git-cache-bundle" + ) + with timed(quiet=True) as restore_cache: + # Allow failure in case the cache was evicted + if ctx.run(f"{aws_cmd} s3 cp --only-show-errors {git_cache_url} {bundle_path}", warn=True): + print(f'Successfully restored cache {cache_key}') + ctx.run(f"git clone --mirror {bundle_path} {omnibus_cache_dir}") + cache_state = ctx.run(f"git -C {omnibus_cache_dir} tag -l").stdout + else: + print(f'Failed to restore cache from key {cache_key}') + with timed(quiet=True) as omnibus_elapsed: omnibus_run_task( ctx=ctx, @@ -234,11 +282,27 @@ def build( # Delete the temporary pip.conf file once the build is done os.remove(pip_config_file) + if use_omnibus_git_cache: + stale_tags = ctx.run(f'git -C {omnibus_cache_dir} tag --no-merged', warn=True).stdout + # Purge the cache manually as omnibus will stick to not restoring a tag when + # a mismatch is detected, but will keep the old cached tags. + # Do this before checking for tag differences, in order to remove staled tags + # in case they were included in the bundle in a previous build + for _, tag in enumerate(stale_tags.split(os.linesep)): + ctx.run(f'git -C {omnibus_cache_dir} tag -d {tag}') + if use_remote_cache and ctx.run(f"git -C {omnibus_cache_dir} tag -l").stdout != cache_state: + with timed(quiet=True) as update_cache: + ctx.run(f"git -C {omnibus_cache_dir} bundle create {bundle_path} --tags") + ctx.run(f"{aws_cmd} s3 cp --only-show-errors {bundle_path} {git_cache_url}") + print("Build component timing:") if not skip_deps: print(f"Deps: {deps_elapsed.duration}") print(f"Bundle: {bundle_elapsed.duration}") print(f"Omnibus: {omnibus_elapsed.duration}") + if use_omnibus_git_cache and use_remote_cache: + print(f"Restoring omnibus cache: {restore_cache.duration}") + print(f"Updating omnibus cache: {update_cache.duration}") send_build_metrics(ctx, omnibus_elapsed.duration)