From e0be8f93bcc18cfd454bf217bd8ee55439edc666 Mon Sep 17 00:00:00 2001 From: Ricky Stewart Date: Tue, 9 Nov 2021 10:16:48 -0600 Subject: [PATCH] ci: add bazel roachtest gce teamcity job Release note: None --- build/bazelbuilder/Dockerfile | 7 ++ build/teamcity-bazel-support.sh | 4 +- build/teamcity-nightly-roachtest.sh | 80 +------------------ .../nightlies/roachtest_nightly_gce.sh | 11 +++ .../nightlies/roachtest_nightly_impl.sh | 46 +++++++++++ build/teamcity/util/roachtest_util.sh | 74 +++++++++++++++++ pkg/cmd/roachtest/main.go | 29 +++++-- pkg/cmd/roachtest/test_impl.go | 4 + pkg/cmd/roachtest/test_runner.go | 11 +-- 9 files changed, 175 insertions(+), 91 deletions(-) create mode 100755 build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh create mode 100755 build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh create mode 100644 build/teamcity/util/roachtest_util.sh diff --git a/build/bazelbuilder/Dockerfile b/build/bazelbuilder/Dockerfile index 53cac7f654c9..07868031b831 100644 --- a/build/bazelbuilder/Dockerfile +++ b/build/bazelbuilder/Dockerfile @@ -19,6 +19,7 @@ RUN apt-get update \ make \ netbase \ openjdk-8-jre \ + openssh-client \ patchelf \ unzip \ && update-alternatives --install /usr/bin/clang clang /usr/bin/clang-10 100 \ @@ -49,6 +50,12 @@ RUN apt-get update && \ cd .. && \ rm -rf git-2.29.2.zip git-2.29.2 +RUN curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \ + && echo 'deb https://packages.cloud.google.com/apt cloud-sdk main' | tee /etc/apt/sources.list.d/gcloud.list \ + && apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + google-cloud-sdk + RUN apt-get purge -y \ apt-transport-https \ flex \ diff --git a/build/teamcity-bazel-support.sh b/build/teamcity-bazel-support.sh index c2d3f506a545..140610a2019c 100644 --- a/build/teamcity-bazel-support.sh +++ b/build/teamcity-bazel-support.sh @@ -1,10 +1,11 @@ # FYI: You can run `./dev builder` to run this Docker image. :) # `dev` depends on this variable! Don't change the name or format unless you # also update `dev` accordingly. -BAZEL_IMAGE=cockroachdb/bazel:20211008-130456 +BAZEL_IMAGE=cockroachdb/bazel:20211109-174337 # Call `run_bazel $NAME_OF_SCRIPT` to start an appropriately-configured Docker # container with the `cockroachdb/bazel` image running the given script. +# BAZEL_SUPPORT_EXTRA_DOCKER_ARGS will be passed on to `docker run` unchanged. run_bazel() { if [ -z "${root:-}" ] then @@ -28,6 +29,7 @@ run_bazel() { docker run -i ${tty-} --rm --init \ -u "$(id -u):$(id -g)" \ --workdir="/go/src/github.com/cockroachdb/cockroach" \ + ${BAZEL_SUPPORT_EXTRA_DOCKER_ARGS:+$BAZEL_SUPPORT_EXTRA_DOCKER_ARGS} \ ${vols} \ $BAZEL_IMAGE "$@" } diff --git a/build/teamcity-nightly-roachtest.sh b/build/teamcity-nightly-roachtest.sh index 70b32a75d860..f1fde0bf9bb7 100755 --- a/build/teamcity-nightly-roachtest.sh +++ b/build/teamcity-nightly-roachtest.sh @@ -23,84 +23,8 @@ export PATH=$PATH:$(GOFLAGS=; go env GOPATH)/bin build/builder/mkrelease.sh amd64-linux-gnu build bin/workload bin/roachtest bin/roachprod \ > "${artifacts}/build.txt" 2>&1 || (cat "${artifacts}/build.txt"; false) -# Set up Google credentials. Note that we need this for all clouds since we upload -# perf artifacts to Google Storage at the end. -if [[ "$GOOGLE_EPHEMERAL_CREDENTIALS" ]]; then - echo "$GOOGLE_EPHEMERAL_CREDENTIALS" > creds.json - gcloud auth activate-service-account --key-file=creds.json - export ROACHPROD_USER=teamcity -else - echo 'warning: GOOGLE_EPHEMERAL_CREDENTIALS not set' >&2 - echo "Assuming that you've run \`gcloud auth login\` from inside the builder." >&2 -fi - -# Early bind the stats dir. Roachtest invocations can take ages, and we want the -# date at the time of the start of the run (which identifies the version of the -# code run best). -stats_dir="$(date +"%Y%m%d")-${TC_BUILD_ID}" - -# Set up a function we'll invoke at the end. -function upload_stats { - if tc_release_branch; then - bucket="cockroach-nightly-${CLOUD}" - if [[ "${CLOUD}" == "gce" ]]; then - # GCE, having been there first, gets an exemption. - bucket="cockroach-nightly" - fi - - remote_artifacts_dir="artifacts-${TC_BUILD_BRANCH}" - if [[ "${TC_BUILD_BRANCH}" == "master" ]]; then - # The master branch is special, as roachperf hard-codes - # the location. - remote_artifacts_dir="artifacts" - fi - - # The stats.json files need some path translation: - # ${artifacts}/path/to/test/stats.json - # to - # gs://${bucket}/artifacts/${stats_dir}/path/to/test/stats.json - # - # `find` below will expand "{}" as ./path/to/test/stats.json. We need - # to bend over backwards to remove the `./` prefix or gsutil will have - # a `.` folder in ${stats_dir}, which we don't want. - (cd "${artifacts}" && \ - while IFS= read -r f; do - if [[ -n "${f}" ]]; then - gsutil cp "${f}" "gs://${bucket}/${remote_artifacts_dir}/${stats_dir}/${f}" - fi - done <<< "$(find . -name stats.json | sed 's/^\.\///')") - fi -} - -# Upload any stats.json we can find, no matter what happens. -trap upload_stats EXIT - -# Set up the parameters for the roachtest invocation. -PARALLELISM=16 -CPUQUOTA=1024 -ZONES="" -TESTS="" -case "${CLOUD}" in - gce) - # We specify --zones below so that nodes are created in us-central1-b by - # default. This reserves us-east1-b (the roachprod default zone) for use by - # manually created clusters. - ZONES="us-central1-b,us-west1-b,europe-west2-b" - ;; - aws) - PARALLELISM=3 - CPUQUOTA=384 - if [ -z "${TESTS}" ]; then - # NB: anchor ycsb to beginning of line to avoid matching `zfs/ycsb/*` which - # isn't supported on AWS at time of writing. - TESTS="kv(0|95)|^ycsb|tpcc/(headroom/n4cpu16)|tpccbench/(nodes=3/cpu=16)|scbench/randomload/(nodes=3/ops=2000/conc=1)|backup/(KMS/n3cpu4)" - fi - ;; - *) - echo "unknown cloud ${CLOUD}" - exit 1 - ;; -esac +# Set up GCE authentication, artifact upload logic, and the PARALLELISM/CPUQUOTA/TESTS env variables. +source $root/build/teamcity/util/roachtest_util.sh build/teamcity-roachtest-invoke.sh \ --cloud="${CLOUD}" \ diff --git a/build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh b/build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh new file mode 100755 index 000000000000..ce1646276b13 --- /dev/null +++ b/build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -exuo pipefail + +dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" + +source "$dir/teamcity-support.sh" # For $root +source "$dir/teamcity-bazel-support.sh" # For run_bazel + +BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e BUILD_TAG -e CLOUD -e COCKROACH_DEV_LICENSE -e COUNT -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILD_BRANCH -e TC_BUILD_ID" \ + run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh diff --git a/build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh b/build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh new file mode 100755 index 000000000000..d1dba69d2e4d --- /dev/null +++ b/build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +set -exuo pipefail + +dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" + +source "$dir/teamcity-support.sh" + +if [[ ! -f ~/.ssh/id_rsa.pub ]]; then + ssh-keygen -q -C "roachtest-nightly-bazel $(date)" -N "" -f ~/.ssh/id_rsa +fi + +bazel build --config crosslinux --config ci --config with_ui -c opt \ + //pkg/cmd/cockroach //pkg/cmd/workload //pkg/cmd/roachtest \ + //pkg/cmd/roachprod //c-deps:libgeos +BAZEL_BIN=$(bazel info bazel-bin --config crosslinux --config ci --config with_ui -c opt) +# Move this stuff to bin for simplicity. +mkdir -p bin +chmod o+rwx bin +cp $BAZEL_BIN/pkg/cmd/cockroach/cockroach_/cockroach bin +cp $BAZEL_BIN/pkg/cmd/roachprod/roachprod_/roachprod bin +cp $BAZEL_BIN/pkg/cmd/roachtest/roachtest_/roachtest bin +cp $BAZEL_BIN/pkg/cmd/workload/workload_/workload bin +chmod a+w bin/cockroach bin/roachprod bin/roachtest bin/workload +# Stage the geos libs in the appropriate spot. +mkdir -p lib.docker_amd64 +chmod o+rwx lib.docker_amd64 +cp $BAZEL_BIN/c-deps/libgeos/lib/libgeos.so lib.docker_amd64 +cp $BAZEL_BIN/c-deps/libgeos/lib/libgeos_c.so lib.docker_amd64 +chmod a+w lib.docker_amd64/libgeos.so lib.docker_amd64/libgeos_c.so + +artifacts=/artifacts +source $root/build/teamcity/util/roachtest_util.sh + +build/teamcity-roachtest-invoke.sh \ + --cloud="${CLOUD}" \ + --count="${COUNT-1}" \ + --parallelism="${PARALLELISM}" \ + --cpu-quota="${CPUQUOTA}" \ + --cluster-id="${TC_BUILD_ID}" \ + --build-tag="${BUILD_TAG}" \ + --cockroach="${PWD}/bin/cockroach" \ + --artifacts=/artifacts \ + --artifacts-literal="${LITERAL_ARTIFACTS_DIR:-}" \ + --slack-token="${SLACK_TOKEN}" \ + "${TESTS}" diff --git a/build/teamcity/util/roachtest_util.sh b/build/teamcity/util/roachtest_util.sh new file mode 100644 index 000000000000..a5971d178963 --- /dev/null +++ b/build/teamcity/util/roachtest_util.sh @@ -0,0 +1,74 @@ +# Common logic used by the nightly roachtest scripts (Bazel and non-Bazel). + +# Set up Google credentials. Note that we need this for all clouds since we upload +# perf artifacts to Google Storage at the end. +if [[ "$GOOGLE_EPHEMERAL_CREDENTIALS" ]]; then + echo "$GOOGLE_EPHEMERAL_CREDENTIALS" > creds.json + gcloud auth activate-service-account --key-file=creds.json + export ROACHPROD_USER=teamcity +else + echo 'warning: GOOGLE_EPHEMERAL_CREDENTIALS not set' >&2 + echo "Assuming that you've run \`gcloud auth login\` from inside the builder." >&2 +fi + +# Early bind the stats dir. Roachtest invocations can take ages, and we want the +# date at the time of the start of the run (which identifies the version of the +# code run best). +stats_dir="$(date +"%Y%m%d")-${TC_BUILD_ID}" + +# Set up a function we'll invoke at the end. +function upload_stats { + if tc_release_branch; then + bucket="cockroach-nightly-${CLOUD}" + if [[ "${CLOUD}" == "gce" ]]; then + # GCE, having been there first, gets an exemption. + bucket="cockroach-nightly" + fi + + remote_artifacts_dir="artifacts-${TC_BUILD_BRANCH}" + if [[ "${TC_BUILD_BRANCH}" == "master" ]]; then + # The master branch is special, as roachperf hard-codes + # the location. + remote_artifacts_dir="artifacts" + fi + + # The stats.json files need some path translation: + # ${artifacts}/path/to/test/stats.json + # to + # gs://${bucket}/artifacts/${stats_dir}/path/to/test/stats.json + # + # `find` below will expand "{}" as ./path/to/test/stats.json. We need + # to bend over backwards to remove the `./` prefix or gsutil will have + # a `.` folder in ${stats_dir}, which we don't want. + (cd "${artifacts}" && \ + while IFS= read -r f; do + if [[ -n "${f}" ]]; then + gsutil cp "${f}" "gs://${bucket}/${remote_artifacts_dir}/${stats_dir}/${f}" + fi + done <<< "$(find . -name stats.json | sed 's/^\.\///')") + fi +} + +# Upload any stats.json we can find, no matter what happens. +trap upload_stats EXIT + +# Set up the parameters for the roachtest invocation. +PARALLELISM=16 +CPUQUOTA=1024 +TESTS="" +case "${CLOUD}" in + gce) + aws) + PARALLELISM=3 + CPUQUOTA=384 + if [ -z "${TESTS}" ]; then + # NB: anchor ycsb to beginning of line to avoid matching `zfs/ycsb/*` which + # isn't supported on AWS at time of writing. + TESTS="kv(0|95)|^ycsb|tpcc/(headroom/n4cpu16)|tpccbench/(nodes=3/cpu=16)|scbench/randomload/(nodes=3/ops=2000/conc=1)|backup/(KMS/n3cpu4)" + fi + ;; + *) + echo "unknown cloud ${CLOUD}" + exit 1 + ;; +esac diff --git a/pkg/cmd/roachtest/main.go b/pkg/cmd/roachtest/main.go index 7f4470d3be05..5545fba33a36 100644 --- a/pkg/cmd/roachtest/main.go +++ b/pkg/cmd/roachtest/main.go @@ -47,6 +47,10 @@ func main() { // Path to a local dir where the test logs and artifacts collected from // cluster will be placed. var artifacts string + // Path to the literal on-agent directory where artifacts are stored. + // May be different from `artifacts`. Only used for messages to + // ##teamcity[publishArtifacts] in Teamcity mode. + var literalArtifacts string var httpPort int var debugEnabled bool var clusterID string @@ -173,6 +177,9 @@ failed, it is 10. Any other exit status reports a problem with the test runner itself. `, RunE: func(_ *cobra.Command, args []string) error { + if literalArtifacts == "" { + literalArtifacts = artifacts + } return runTests(tests.RegisterTests, cliCfg{ args: args, count: count, @@ -181,6 +188,7 @@ runner itself. httpPort: httpPort, parallelism: parallelism, artifactsDir: artifacts, + literalArtifactsDir: literalArtifacts, user: username, clusterID: clusterID, versionsBinaryOverride: versionsBinaryOverride, @@ -208,6 +216,9 @@ runner itself. Short: "run automated benchmarks on cockroach cluster", Long: `Run automated benchmarks on existing or ephemeral cockroach clusters.`, RunE: func(_ *cobra.Command, args []string) error { + if literalArtifacts == "" { + literalArtifacts = artifacts + } return runTests(tests.RegisterBenchmarks, cliCfg{ args: args, count: count, @@ -227,6 +238,8 @@ runner itself. for _, cmd := range []*cobra.Command{runCmd, benchCmd} { cmd.Flags().StringVar( &artifacts, "artifacts", "artifacts", "path to artifacts directory") + cmd.Flags().StringVar( + &literalArtifacts, "artifacts-literal", "", "literal path to on-agent artifacts directory. Used for messages to ##teamcity[publishArtifacts] in --teamcity mode. May be different from --artifacts; defaults to the value of --artifacts if not provided") cmd.Flags().StringVar( &cloud, "cloud", cloud, "cloud provider to use (aws, azure, or gce)") cmd.Flags().StringVar( @@ -288,6 +301,7 @@ type cliCfg struct { httpPort int parallelism int artifactsDir string + literalArtifactsDir string user string clusterID string versionsBinaryOverride map[string]string @@ -340,12 +354,13 @@ func runTests(register func(registry.Registry), cfg cliCfg) error { runnerDir, fmt.Sprintf("test_runner-%d.log", timeutil.Now().Unix())) l, tee := testRunnerLogger(context.Background(), cfg.parallelism, runnerLogPath) lopt := loggingOpt{ - l: l, - tee: tee, - stdout: os.Stdout, - stderr: os.Stderr, - artifactsDir: cfg.artifactsDir, - runnerLogPath: runnerLogPath, + l: l, + tee: tee, + stdout: os.Stdout, + stderr: os.Stderr, + artifactsDir: cfg.artifactsDir, + literalArtifactsDir: cfg.literalArtifactsDir, + runnerLogPath: runnerLogPath, } // We're going to run all the workers (and thus all the tests) in a context @@ -367,7 +382,7 @@ func runTests(register func(registry.Registry), cfg cliCfg) error { if teamCity { // Collect the runner logs. - fmt.Printf("##teamcity[publishArtifacts '%s']\n", runnerDir) + fmt.Printf("##teamcity[publishArtifacts '%s']\n", filepath.Join(cfg.literalArtifactsDir, runnerLogsDir)) } return err } diff --git a/pkg/cmd/roachtest/test_impl.go b/pkg/cmd/roachtest/test_impl.go index 5baf783884cb..add6f44d8506 100644 --- a/pkg/cmd/roachtest/test_impl.go +++ b/pkg/cmd/roachtest/test_impl.go @@ -482,6 +482,10 @@ type loggingOpt struct { // artifactsDir is that path to the dir that will contain the artifacts for // all the tests. artifactsDir string + // path to the literal on-agent directory where artifacts are stored. May + // be different from artifactsDir since the roachtest may be running in + // a container. + literalArtifactsDir string // runnerLogPath is that path to the runner's log file. runnerLogPath string } diff --git a/pkg/cmd/roachtest/test_runner.go b/pkg/cmd/roachtest/test_runner.go index 78b5ce83dbdf..1a0c95ff7d93 100644 --- a/pkg/cmd/roachtest/test_runner.go +++ b/pkg/cmd/roachtest/test_runner.go @@ -302,7 +302,7 @@ func (r *testRunner) Run( ctx, fmt.Sprintf("w%d", i) /* name */, r.work, qp, stopper.ShouldQuiesce(), clustersOpt.keepClustersOnTestFailure, - lopt.artifactsDir, lopt.runnerLogPath, lopt.tee, lopt.stdout, + lopt.artifactsDir, lopt.literalArtifactsDir, lopt.runnerLogPath, lopt.tee, lopt.stdout, allocateCluster, topt, l, @@ -368,6 +368,8 @@ type clusterAllocatorFn func( // name: The worker's name, to be used as a prefix for log messages. // artifactsRootDir: The artifacts dir. Each test's logs are going to be under a // run_ dir. If empty, test log files will not be created. +// literalArtifactsDir: The literal on-agent path where artifacts are stored. +// Only used for teamcity[publishArtifacts] messages. // testRunnerLogPath: The path to the test runner's log. It will be copied to // failing tests' artifacts dir if running under TeamCity. // stdout: The Writer to use for messages that need to go to stdout (e.g. the @@ -382,6 +384,7 @@ func (r *testRunner) runWorker( interrupt <-chan struct{}, debug bool, artifactsRootDir string, + literalArtifactsDir string, testRunnerLogPath string, teeOpt logger.TeeOptType, stdout io.Writer, @@ -466,15 +469,13 @@ func (r *testRunner) runWorker( escapedTestName := teamCityNameEscape(testToRun.spec.Name) runSuffix := "run_" + strconv.Itoa(testToRun.runNum) - base := filepath.Join(artifactsRootDir, escapedTestName) - - artifactsDir = filepath.Join(base, runSuffix) + artifactsDir = filepath.Join(filepath.Join(artifactsRootDir, escapedTestName), runSuffix) logPath = filepath.Join(artifactsDir, "test.log") // Map artifacts/TestFoo/run_?/** => TestFoo/run_?/**, i.e. collect the artifacts // for this test exactly as they are laid out on disk (when the time // comes). - artifactsSpec = fmt.Sprintf("%s/%s/** => %s/%s", base, runSuffix, escapedTestName, runSuffix) + artifactsSpec = fmt.Sprintf("%s/%s/** => %s/%s", filepath.Join(literalArtifactsDir, escapedTestName), runSuffix, escapedTestName, runSuffix) } testL, err := logger.RootLogger(logPath, teeOpt) if err != nil {