diff --git a/build/bazelutil/check.sh b/build/bazelutil/check.sh index 55e15539696c..ca66f8ea141d 100755 --- a/build/bazelutil/check.sh +++ b/build/bazelutil/check.sh @@ -53,7 +53,7 @@ pkg/util/timeutil/zoneinfo.go://go:generate go run gen/main.go EXISTING_BROKEN_TESTS_IN_BAZEL=" pkg/acceptance/BUILD.bazel pkg/cmd/cockroach-oss/BUILD.bazel -pkg/cmd/github-post/BUILD.bazel +pkg/cmd/bazci/githubpost/BUILD.bazel pkg/cmd/prereqs/BUILD.bazel pkg/cmd/roachtest/BUILD.bazel pkg/cmd/teamcity-trigger/BUILD.bazel diff --git a/build/teamcity-bazel-support.sh b/build/teamcity-bazel-support.sh index d6acf0289f3c..d76670bb043f 100644 --- a/build/teamcity-bazel-support.sh +++ b/build/teamcity-bazel-support.sh @@ -47,71 +47,3 @@ _tc_release_branch() { branch=$(_tc_build_branch) [[ "$branch" == master || "$branch" == release-* || "$branch" == provisional_* ]] } - -# process_test_json processes logs and submits failures to GitHub -# Requires GITHUB_API_TOKEN set for the release branches. -# Accepts 5 arguments: -# testfilter: path to the `testfilter` executable, usually -# `$BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter` -# github_post: path to the `github-post` executable, usually -# `$BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post` -# artifacts_dir: usually `/artifacts` -# test_json: path to test's JSON output, usually generated by `rules_go`'s and -# `GO_TEST_JSON_OUTPUT_FILE`. -# create_tarball: whether to create a tarball with full logs. If the test's -# exit code is passed, the tarball is generated on failures. -# -# The variable BAZEL_SUPPORT_EXTRA_GITHUB_POST_ARGS can be set to add extra -# arguments to $github_post. -process_test_json() { - local testfilter=$1 - local github_post=$2 - local artifacts_dir=$3 - local test_json=$4 - local create_tarball=$5 - - $testfilter -mode=strip < "$test_json" | $testfilter -mode=omit | $testfilter -mode=convert > "$artifacts_dir"/failures.txt - failures_size=$(stat --format=%s "$artifacts_dir"/failures.txt) - if [ $failures_size = 0 ]; then - rm -f "$artifacts_dir"/failures.txt - fi - - if _tc_release_branch; then - if [ -z "${GITHUB_API_TOKEN-}" ]; then - # GITHUB_API_TOKEN must be in the env or github-post will barf if it's - # ever asked to post, so enforce that on all runs. - # The way this env var is made available here is quite tricky. The build - # calling this method is usually a build that is invoked from PRs, so it - # can't have secrets available to it (for the PR could modify - # build/teamcity-* to leak the secret). Instead, we provide the secrets - # to a higher-level job (Publish Bleeding Edge) and use TeamCity magic to - # pass that env var through when it's there. This means we won't have the - # env var on PR builds, but we'll have it for builds that are triggered - # from the release branches. - echo "GITHUB_API_TOKEN must be set" - exit 1 - else - $github_post ${BAZEL_SUPPORT_EXTRA_GITHUB_POST_ARGS:+$BAZEL_SUPPORT_EXTRA_GITHUB_POST_ARGS} < "$test_json" - fi - fi - - if [ "$create_tarball" -ne 0 ]; then - # Keep the debug file around for failed builds. Compress it to avoid - # clogging the agents with stuff we'll hopefully rarely ever need to - # look at. - # If the process failed, also save the full human-readable output. This is - # helpful in cases in which tests timed out, where it's difficult to blame - # the failure on any particular test. It's also a good alternative to poking - # around in test.json.txt itself when anything else we don't handle well happens, - # whatever that may be. - $testfilter -mode=convert < "$test_json" > "$artifacts_dir"/full_output.txt - (cd "$artifacts_dir" && tar --strip-components 1 -czf full_output.tgz full_output.txt $(basename $test_json)) - rm -rf "$artifacts_dir"/full_output.txt - fi - - # Some unit tests test automatic ballast creation. These ballasts can be - # larger than the maximum artifact size. Remove any artifacts with the - # EMERGENCY_BALLAST filename. - find "$artifacts_dir" -name "EMERGENCY_BALLAST" -delete -} - diff --git a/build/teamcity-testrace.sh b/build/teamcity-testrace.sh index 42f3095411c8..582ede05caa1 100755 --- a/build/teamcity-testrace.sh +++ b/build/teamcity-testrace.sh @@ -43,6 +43,10 @@ tc_end_block "Compile C dependencies" TESTTIMEOUT=${TESTTIMEOUT:-45m} for pkg in $pkgspec; do + # Skip known-bad tests (generally Bazel-specific ones). + if [[ "$pkg" == "./pkg/cmd/bazci" ]]; then + continue + fi tc_start_block "Run ${pkg} under race detector" run_json_test build/builder.sh env \ COCKROACH_LOGIC_TESTS_SKIP=true \ diff --git a/build/teamcity/cockroach/nightlies/cloud_unit_tests_impl.sh b/build/teamcity/cockroach/nightlies/cloud_unit_tests_impl.sh index c7aba0220c9e..ebce9d8528cf 100755 --- a/build/teamcity/cockroach/nightlies/cloud_unit_tests_impl.sh +++ b/build/teamcity/cockroach/nightlies/cloud_unit_tests_impl.sh @@ -3,14 +3,12 @@ set -xeuo pipefail dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" -source "$dir/teamcity-bazel-support.sh" # For process_test_json source "$dir/teamcity-support.sh" # For log_into_gcloud -bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +bazel build //pkg/cmd/bazci --config=ci BAZEL_BIN=$(bazel info bazel-bin --config=ci) ARTIFACTS_DIR=/artifacts -GO_TEST_JSON_OUTPUT_FILE=$ARTIFACTS_DIR/test.json.txt google_credentials="$GOOGLE_EPHEMERAL_CREDENTIALS" log_into_gcloud @@ -25,11 +23,10 @@ export AWS_CONFIG_FILE="$PWD/.aws/config" log_into_aws exit_status=0 -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci test -- --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures -- test --config=ci \ //pkg/cloud/gcp:gcp_test //pkg/cloud/amazon:amazon_test //pkg/ccl/cloudccl/gcp:gcp_test //pkg/ccl/cloudccl/amazon:amazon_test \ --test_env=GO_TEST_WRAP_TESTV=1 \ --test_env=GO_TEST_WRAP=1 \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE \ --test_env=GOOGLE_CREDENTIALS_JSON="$GOOGLE_EPHEMERAL_CREDENTIALS" \ --test_env=GOOGLE_APPLICATION_CREDENTIALS="$GOOGLE_APPLICATION_CREDENTIALS" \ --test_env=GOOGLE_BUCKET="nightly-cloud-unit-tests" \ @@ -51,11 +48,4 @@ $BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci test -- --config=ci \ --test_timeout=900 \ || exit_status=$? -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - $ARTIFACTS_DIR \ - $GO_TEST_JSON_OUTPUT_FILE \ - $exit_status - exit $exit_status diff --git a/build/teamcity/cockroach/nightlies/compose.sh b/build/teamcity/cockroach/nightlies/compose.sh index 7049c37dc851..125f7ce05c41 100755 --- a/build/teamcity/cockroach/nightlies/compose.sh +++ b/build/teamcity/cockroach/nightlies/compose.sh @@ -8,7 +8,7 @@ source "$dir/teamcity-bazel-support.sh" tc_start_block "Run compose tests" -bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +bazel build //pkg/cmd/bazci --config=ci BAZEL_BIN=$(bazel info bazel-bin --config=ci) BAZCI=$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci @@ -18,24 +18,16 @@ COCKROACH=$CROSSBIN/pkg/cmd/cockroach/cockroach_/cockroach COMPAREBIN=$CROSSBIN/pkg/compose/compare/compare/compare_test_/compare_test ARTIFACTS_DIR=$PWD/artifacts mkdir -p $ARTIFACTS_DIR -GO_TEST_JSON_OUTPUT_FILE=$ARTIFACTS_DIR/test.json.txt exit_status=0 -$BAZCI --artifacts_dir=$ARTIFACTS_DIR -- \ +$BAZCI --process_test_failures --artifacts_dir=$ARTIFACTS_DIR -- \ test --config=ci //pkg/compose:compose_test \ "--sandbox_writable_path=$ARTIFACTS_DIR" \ "--test_tmpdir=$ARTIFACTS_DIR" \ --test_env=GO_TEST_WRAP_TESTV=1 \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE \ --test_arg -cockroach --test_arg $COCKROACH \ --test_arg -compare --test_arg $COMPAREBIN \ --test_timeout=1800 || exit_status=$? -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - $ARTIFACTS_DIR \ - $GO_TEST_JSON_OUTPUT_FILE \ - $exit_status tc_end_block "Run compose tests" exit $exit_status diff --git a/build/teamcity/cockroach/nightlies/lint_urls_impl.sh b/build/teamcity/cockroach/nightlies/lint_urls_impl.sh index b22468913307..c064fe472944 100755 --- a/build/teamcity/cockroach/nightlies/lint_urls_impl.sh +++ b/build/teamcity/cockroach/nightlies/lint_urls_impl.sh @@ -3,9 +3,8 @@ set -xeuo pipefail dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" -source "$dir/teamcity-bazel-support.sh" # For process_test_json -bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +bazel build //pkg/cmd/bazci --config=ci BAZEL_BIN=$(bazel info bazel-bin --config=ci) GO_TEST_JSON_OUTPUT_FILE=/artifacts/test.json.txt exit_status=0 @@ -15,10 +14,3 @@ XML_OUTPUT_FILE=/artifacts/test.xml GO_TEST_WRAP_TESTV=1 GO_TEST_WRAP=1 GO_TEST_ # The schema of the output test.xml will be slightly wrong -- ask `bazci` to fix # it up. $BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci munge-test-xml /artifacts/test.xml -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - /artifacts \ - $GO_TEST_JSON_OUTPUT_FILE \ - $exit_status -exit $exit_status diff --git a/build/teamcity/cockroach/nightlies/optimizer_tests_impl.sh b/build/teamcity/cockroach/nightlies/optimizer_tests_impl.sh index dac92c503286..b9e26d001ff0 100755 --- a/build/teamcity/cockroach/nightlies/optimizer_tests_impl.sh +++ b/build/teamcity/cockroach/nightlies/optimizer_tests_impl.sh @@ -3,27 +3,19 @@ set -xeuo pipefail dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" -source "$dir/teamcity-bazel-support.sh" source "$dir/teamcity/util.sh" -bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +bazel build //pkg/cmd/bazci --config=ci BAZEL_BIN=$(bazel info bazel-bin --config=ci) tc_start_block "Run opt tests with fast_int_set_large" ARTIFACTS_DIR=/artifacts/fast_int_set_large mkdir $ARTIFACTS_DIR -GO_TEST_JSON_OUTPUT_FILE=$ARTIFACTS_DIR/test.json.txt exit_status_large=0 -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --artifacts_dir $ARTIFACTS_DIR -- \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --artifacts_dir $ARTIFACTS_DIR --process_test_failures -- \ test //pkg/sql/opt:opt_test --config=ci \ --define gotags=bazel,crdb_test,fast_int_set_large \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE || exit_status_large=$? -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - $ARTIFACTS_DIR \ - $GO_TEST_JSON_OUTPUT_FILE \ - $exit_status_large + || exit_status_large=$? tc_end_block "Run opt tests with fast_int_set_large" # NOTE(ricky): Running both tests in the same configuration with different @@ -32,19 +24,12 @@ tc_end_block "Run opt tests with fast_int_set_large" tc_start_block "Run opt tests with fast_int_set_small" ARTIFACTS_DIR=/artifacts/fast_int_set_small mkdir $ARTIFACTS_DIR -GO_TEST_JSON_OUTPUT_FILE=$ARTIFACTS_DIR/test.json.txt exit_status_small=0 -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --artifacts_dir $ARTIFACTS_DIR -- \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --artifacts_dir $ARTIFACTS_DIR --process_test_failures -- \ test --config=ci \ //pkg/sql/opt:opt_test \ --define gotags=bazel,crdb_test,fast_int_set_small \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE || exit_status_small=$? -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - $ARTIFACTS_DIR \ - $GO_TEST_JSON_OUTPUT_FILE \ - $exit_status_large + || exit_status_small=$? tc_end_block "Run opt tests with fast_int_set_small" if [ $exit_status_large -ne 0 ] diff --git a/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_impl.sh b/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_impl.sh index 934d1e03f9a5..9517095ba8f5 100755 --- a/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_impl.sh +++ b/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_impl.sh @@ -1,16 +1,14 @@ #!/usr/bin/env bash dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" -source "$dir/teamcity-bazel-support.sh" # For process_test_json set -euxo pipefail ARTIFACTS_DIR=/artifacts/meta mkdir -p $ARTIFACTS_DIR -GO_TEST_JSON_OUTPUT_FILE=/artifacts/test.json.txt echo "TC_SERVER_URL is $TC_SERVER_URL" -bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +bazel build //pkg/cmd/bazci --config=ci BAZEL_BIN=$(bazel info bazel-bin --config ci) @@ -18,20 +16,14 @@ exit_status=0 # NB: If adjusting the metamorphic test flags below, be sure to also update # pkg/cmd/github-post/main.go to ensure the GitHub issue poster includes the # correct flags in the reproduction command. -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci -- test --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures --formatter=pebble-metamorphic -- test --config=ci \ @com_github_cockroachdb_pebble//internal/metamorphic:metamorphic_test \ --test_timeout=25200 '--test_filter=TestMeta$' \ --define gotags=bazel,invariants \ - "--test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE" \ - --run_under "@com_github_cockroachdb_stress//:stress -bazel -shardable-artifacts 'GO_TEST_JSON_OUTPUT_FILE=cat,XML_OUTPUT_FILE=$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci merge-test-xmls' -maxtime 6h -maxfails 1 -stderr -p 1" \ + --run_under "@com_github_cockroachdb_stress//:stress -bazel -shardable-artifacts 'XML_OUTPUT_FILE=$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci merge-test-xmls' -maxtime 6h -maxfails 1 -stderr -p 1" \ --test_arg -dir --test_arg $ARTIFACTS_DIR \ --test_arg -ops --test_arg "uniform:5000-10000" \ --test_output streamed \ || exit_status=$? -BAZEL_SUPPORT_EXTRA_GITHUB_POST_ARGS=--formatter=pebble-metamorphic process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - /artifacts $GO_TEST_JSON_OUTPUT_FILE $exit_status - exit $exit_status diff --git a/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_race_impl.sh b/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_race_impl.sh index faf888444e6d..0bb4b5bf8cf8 100755 --- a/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_race_impl.sh +++ b/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_race_impl.sh @@ -1,16 +1,14 @@ #!/usr/bin/env bash dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" -source "$dir/teamcity-bazel-support.sh" # For process_test_json set -euxo pipefail ARTIFACTS_DIR=/artifacts/meta mkdir -p $ARTIFACTS_DIR -GO_TEST_JSON_OUTPUT_FILE=/artifacts/test.json.txt echo "TC_SERVER_URL is $TC_SERVER_URL" -bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +bazel build //pkg/cmd/bazci --config=ci BAZEL_BIN=$(bazel info bazel-bin --config ci) @@ -18,20 +16,14 @@ exit_status=0 # NB: If adjusting the metamorphic test flags below, be sure to also update # pkg/cmd/github-post/main.go to ensure the GitHub issue poster includes the # correct flags in the reproduction command. -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci -- test --config=race --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures --formatter=pebble-metamorphic -- test --config=race --config=ci \ @com_github_cockroachdb_pebble//internal/metamorphic:metamorphic_test \ --test_timeout=14400 '--test_filter=TestMeta$' \ --define gotags=bazel,invariants \ - "--test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE" \ - --run_under "@com_github_cockroachdb_stress//:stress -bazel -shardable-artifacts 'GO_TEST_JSON_OUTPUT_FILE=cat,XML_OUTPUT_FILE=$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci merge-test-xmls' -maxtime 3h -maxfails 1 -stderr -p 1" \ + --run_under "@com_github_cockroachdb_stress//:stress -bazel -shardable-artifacts 'XML_OUTPUT_FILE=$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci merge-test-xmls' -maxtime 3h -maxfails 1 -stderr -p 1" \ --test_arg -dir --test_arg $ARTIFACTS_DIR \ --test_arg -ops --test_arg "uniform:5000-10000" \ --test_output streamed \ || exit_status=$? -BAZEL_SUPPORT_EXTRA_GITHUB_POST_ARGS=--formatter=pebble-metamorphic process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - /artifacts $GO_TEST_JSON_OUTPUT_FILE $exit_status - exit $exit_status diff --git a/build/teamcity/cockroach/nightlies/random_syntax_tests_impl.sh b/build/teamcity/cockroach/nightlies/random_syntax_tests_impl.sh index 8f9b468f3922..b87841948323 100755 --- a/build/teamcity/cockroach/nightlies/random_syntax_tests_impl.sh +++ b/build/teamcity/cockroach/nightlies/random_syntax_tests_impl.sh @@ -3,22 +3,15 @@ set -xeuo pipefail dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" -source "$dir/teamcity-bazel-support.sh" -bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +bazel build //pkg/cmd/bazci --config=ci BAZEL_BIN=$(bazel info bazel-bin --config=ci) -GO_TEST_JSON_OUTPUT_FILE=/artifacts/test.json.txt exit_status=0 -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci -- test --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures -- test --config=ci \ //pkg/sql/tests:tests_test \ --test_arg -rsg=5m --test_arg -rsg-routines=8 --test_arg -rsg-exec-timeout=1m \ --test_timeout 3600 --test_filter 'TestRandomSyntax' \ --test_sharding_strategy=disabled \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE || exit_status=$? -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - /artifacts \ - $GO_TEST_JSON_OUTPUT_FILE \ - $exit_status + || exit_status=$? + exit $exit_status diff --git a/build/teamcity/cockroach/nightlies/sqlite_logic_test_impl.sh b/build/teamcity/cockroach/nightlies/sqlite_logic_test_impl.sh index 45b149032cbe..0654f198bd21 100755 --- a/build/teamcity/cockroach/nightlies/sqlite_logic_test_impl.sh +++ b/build/teamcity/cockroach/nightlies/sqlite_logic_test_impl.sh @@ -3,20 +3,13 @@ set -xeuo pipefail dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" -source "$dir/teamcity-bazel-support.sh" -bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +bazel build //pkg/cmd/bazci --config=ci BAZEL_BIN=$(bazel info bazel-bin --config=ci) -GO_TEST_JSON_OUTPUT_FILE=/artifacts/test.json.txt exit_status=0 -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci -- test --config=ci --config=crdb_test_off \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures -- test --config=ci --config=crdb_test_off \ //pkg/sql/sqlitelogictest/tests/... \ --test_arg -bigtest --test_arg -flex-types --test_timeout 86400 \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE || exit_status=$? -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - /artifacts \ - $GO_TEST_JSON_OUTPUT_FILE \ - $exit_status + || exit_status=$? + exit $exit_status diff --git a/build/teamcity/cockroach/nightlies/sqllogic_corpus_nightly_impl.sh b/build/teamcity/cockroach/nightlies/sqllogic_corpus_nightly_impl.sh index 90c4412d8783..fde3f7dc8b49 100755 --- a/build/teamcity/cockroach/nightlies/sqllogic_corpus_nightly_impl.sh +++ b/build/teamcity/cockroach/nightlies/sqllogic_corpus_nightly_impl.sh @@ -3,22 +3,15 @@ set -xeuo pipefail dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" -source "$dir/teamcity-bazel-support.sh" # For process_test_json -source "$dir/teamcity-support.sh" # For process_test_json +source "$dir/teamcity-support.sh" -bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +bazel build //pkg/cmd/bazci --config=ci BAZEL_BIN=$(bazel info bazel-bin --config=ci) google_credentials="$GOOGLE_EPHEMERAL_CREDENTIALS" log_into_gcloud ARTIFACTS_DIR=/artifacts -GO_TEST_JSON_OUTPUT_FILE=$ARTIFACTS_DIR/test.json.txt -GO_TEST_GEN_JSON_OUTPUT_FILE=$ARTIFACTS_DIR/test-gen.json.txt -GO_TEST_GEN_CCL_JSON_OUTPUT_FILE=$ARTIFACTS_DIR/test-gen-ccl.json.txt -GO_TEST_VALIDATE_JSON_OUTPUT_FILE=$ARTIFACTS_DIR/test-validate.json.txt -GO_TEST_JSON_OUTPUT_FILE_MIXED=$ARTIFACTS_DIR/test-mixed.json.txt -GO_TEST_VALIDATE_JSON_OUTPUT_FILE_MIXED=$ARTIFACTS_DIR/test-validate-mixed.json.txt mkdir -p $ARTIFACTS_DIR/corpus mkdir -p $ARTIFACTS_DIR/corpus-mixed @@ -26,97 +19,56 @@ exit_status=0 # Generate a corpus for all non-mixed version variants for config in local multiregion-9node-3region-3azs; do -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci test -- --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures test -- --config=ci \ //pkg/sql/logictest/tests/$config/... \ --test_arg=--declarative-corpus=$ARTIFACTS_DIR/corpus \ --test_env=GO_TEST_WRAP_TESTV=1 \ --test_env=GO_TEST_WRAP=1 \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE.$config \ --test_timeout=7200 \ || exit_status=$? - -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - $ARTIFACTS_DIR \ - $GO_TEST_JSON_OUTPUT_FILE.$config \ - $exit_status done for config in local multiregion-9node-3region-3azs multiregion-9node-3region-3azs-no-los multiregion-9node-3region-3azs-tenant multiregion-9node-3region-3azs-vec-off multiregion-15node-5region-3azs 3node-tenant 3node-tenant-multiregion; do -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci test -- --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures test -- --config=ci \ //pkg/ccl/logictestccl/tests/$config/... \ --test_arg=--declarative-corpus=$ARTIFACTS_DIR/corpus \ --test_env=GO_TEST_WRAP_TESTV=1 \ --test_env=GO_TEST_WRAP=1 \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE.$config \ --test_timeout=7200 \ || exit_status=$? - -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - $ARTIFACTS_DIR \ - $GO_TEST_JSON_OUTPUT_FILE.$config \ - $exit_status done # Generate corpuses from end-to-end-schema changer tests -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci test -- --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures test -- --config=ci \ //pkg/sql/schemachanger:schemachanger_test \ --test_arg=--declarative-corpus=$ARTIFACTS_DIR/corpus \ --test_filter='^TestGenerateCorpus.*$' \ --test_env=GO_TEST_WRAP_TESTV=1 \ --test_env=GO_TEST_WRAP=1 \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_GEN_JSON_OUTPUT_FILE \ --test_timeout=7200 \ || exit_status=$? -process_test_json \ -$BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ -$BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ -$ARTIFACTS_DIR \ -$GO_TEST_GEN_JSON_OUTPUT_FILE \ -$exit_status - # Generate corpuses from end-to-end-schema changer tests -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci test -- --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures test -- --config=ci \ //pkg/ccl/schemachangerccl:schemachangerccl_test \ --test_arg=--declarative-corpus=$ARTIFACTS_DIR/corpus \ --test_filter='^TestGenerateCorpus.*$' \ --test_env=GO_TEST_WRAP_TESTV=1 \ --test_env=GO_TEST_WRAP=1 \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_GEN_CCL_JSON_OUTPUT_FILE \ --test_timeout=7200 \ || exit_status=$? -process_test_json \ -$BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ -$BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ -$ARTIFACTS_DIR \ -$GO_TEST_GEN_CCL_JSON_OUTPUT_FILE \ -$exit_status - - # Any generated corpus should be validated on the current version first, which # indicates we can replay it on the same version. -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci test -- --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures test -- --config=ci \ //pkg/sql/schemachanger/corpus:corpus_test \ --test_arg=--declarative-corpus=$ARTIFACTS_DIR/corpus \ --test_filter='^TestValidateCorpuses$' \ --test_env=GO_TEST_WRAP_TESTV=1 \ --test_env=GO_TEST_WRAP=1 \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_VALIDATE_JSON_OUTPUT_FILE \ --test_timeout=7200 \ || exit_status=$? -process_test_json \ -$BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ -$BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ -$ARTIFACTS_DIR \ -$GO_TEST_VALIDATE_JSON_OUTPUT_FILE \ -$exit_status - # If validation passes its safe to update the copy in storage. if [ $exit_status = 0 ]; then gsutil cp $ARTIFACTS_DIR/corpus/* gs://cockroach-corpus/corpus-$TC_BUILD_BRANCH/ @@ -124,42 +76,25 @@ fi # Generate a corpus for all mixed version variants for config in local-mixed-22.1-22.2; do -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci test -- --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures test -- --config=ci \ //pkg/sql/logictest/tests/$config/... \ --test_arg=--declarative-corpus=$ARTIFACTS_DIR/corpus-mixed\ --test_env=GO_TEST_WRAP_TESTV=1 \ --test_env=GO_TEST_WRAP=1 \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE_MIXED.$config \ --test_timeout=7200 \ || exit_status=$? -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - $ARTIFACTS_DIR \ - $GO_TEST_JSON_OUTPUT_FILE_MIXED.$config \ - $exit_status -done - # Any generated corpus should be validated on the current version first, which # indicates we can replay it on the same version. -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci test -- --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures test -- --config=ci \ //pkg/sql/schemachanger/corpus:corpus_test \ --test_arg=--declarative-corpus=$ARTIFACTS_DIR/corpus-mixed \ --test_filter='^TestValidateCorpuses$' \ --test_env=GO_TEST_WRAP_TESTV=1 \ --test_env=GO_TEST_WRAP=1 \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_VALIDATE_JSON_OUTPUT_FILE_MIXED \ --test_timeout=7200 \ || exit_status=$? -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - $ARTIFACTS_DIR \ - $GO_TEST_VALIDATE_JSON_OUTPUT_FILE_MIXED \ - $exit_status - # If validation passes its safe to update the copy in storage. if [ $exit_status = 0 ]; then gsutil cp $ARTIFACTS_DIR/corpus-mixed/* gs://cockroach-corpus/corpus-mixed-$TC_BUILD_BRANCH/ diff --git a/build/teamcity/cockroach/nightlies/sqllogic_hi_vmodule_nightly_impl.sh b/build/teamcity/cockroach/nightlies/sqllogic_hi_vmodule_nightly_impl.sh index 97b71fa3bfdb..62f80b7ea99f 100755 --- a/build/teamcity/cockroach/nightlies/sqllogic_hi_vmodule_nightly_impl.sh +++ b/build/teamcity/cockroach/nightlies/sqllogic_hi_vmodule_nightly_impl.sh @@ -3,30 +3,20 @@ set -xeuo pipefail dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" -source "$dir/teamcity-bazel-support.sh" # For process_test_json -bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +bazel build //pkg/cmd/bazci --config=ci BAZEL_BIN=$(bazel info bazel-bin --config=ci) ARTIFACTS_DIR=/artifacts -GO_TEST_JSON_OUTPUT_FILE=$ARTIFACTS_DIR/test.json.txt exit_status=0 -$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci -- test --config=ci \ +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures -- test --config=ci \ //pkg/sql/logictest/tests/... \ --test_arg=--vmodule=*=10 \ --test_arg=-show-sql \ --test_env=GO_TEST_WRAP_TESTV=1 \ --test_env=GO_TEST_WRAP=1 \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE \ --test_timeout=7200 \ || exit_status=$? -process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - $ARTIFACTS_DIR \ - $GO_TEST_JSON_OUTPUT_FILE \ - $exit_status - exit $exit_status diff --git a/build/teamcity/cockroach/nightlies/stress_impl.sh b/build/teamcity/cockroach/nightlies/stress_impl.sh index f52d9d16a212..b48564b3675e 100755 --- a/build/teamcity/cockroach/nightlies/stress_impl.sh +++ b/build/teamcity/cockroach/nightlies/stress_impl.sh @@ -3,7 +3,6 @@ set -xeuo pipefail dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" -source "$dir/teamcity-bazel-support.sh" # For process_test_json if [ -z "${TAGS-}" ] then @@ -12,7 +11,7 @@ else TAGS="bazel,gss,$TAGS" fi -bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +bazel build //pkg/cmd/bazci --config=ci BAZEL_BIN=$(bazel info bazel-bin --config=ci) ARTIFACTS_DIR=/artifacts @@ -33,23 +32,15 @@ do continue fi exit_status=0 - GO_TEST_JSON_OUTPUT_FILE=$ARTIFACTS_DIR/$(echo "$test" | cut -d: -f2).test.json.txt - $BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci -- --config=ci test "$test" \ + $BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --process_test_failures -- --config=ci test "$test" \ --test_env=COCKROACH_NIGHTLY_STRESS=true \ - --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE \ --test_timeout="$TESTTIMEOUTSECS" \ - --run_under "@com_github_cockroachdb_stress//:stress -bazel -shardable-artifacts 'GO_TEST_JSON_OUTPUT_FILE=cat,XML_OUTPUT_FILE=$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci merge-test-xmls' $STRESSFLAGS" \ + --run_under "@com_github_cockroachdb_stress//:stress -bazel -shardable-artifacts 'XML_OUTPUT_FILE=$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci merge-test-xmls' $STRESSFLAGS" \ --define "gotags=$TAGS" \ --nocache_test_results \ --test_output streamed \ ${EXTRA_BAZEL_FLAGS} \ || exit_status=$? - process_test_json \ - $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ - $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ - $ARTIFACTS_DIR \ - $GO_TEST_JSON_OUTPUT_FILE \ - $exit_status if [ $exit_status -ne 0 ] then exit $exit_status diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 88ce549a7914..0c84eda1b93f 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -112,6 +112,8 @@ ALL_TESTS = [ "//pkg/cloud/userfile/filetable/filetabletest:filetabletest_test", "//pkg/cloud/userfile:userfile_test", "//pkg/clusterversion:clusterversion_test", + "//pkg/cmd/bazci/githubpost:githubpost_test", + "//pkg/cmd/bazci/testfilter:testfilter_test", "//pkg/cmd/cmpconn:cmpconn_test", "//pkg/cmd/cockroach-oss:cockroach-oss_disallowed_imports_test", "//pkg/cmd/dev:dev_lib_disallowed_imports_test", @@ -119,7 +121,6 @@ ALL_TESTS = [ "//pkg/cmd/docgen/extract:extract_test", "//pkg/cmd/docs-issue-generation:docs-issue-generation_test", "//pkg/cmd/generate-bazel-extra:generate-bazel-extra_test", - "//pkg/cmd/github-post:github-post_test", "//pkg/cmd/github-pull-request-make:github-pull-request-make_test", "//pkg/cmd/internal/issues:issues_test", "//pkg/cmd/label-merged-pr:label-merged-pr_test", @@ -135,7 +136,6 @@ ALL_TESTS = [ "//pkg/cmd/roachtest/tests:tests_test", "//pkg/cmd/roachtest:roachtest_test", "//pkg/cmd/teamcity-trigger:teamcity-trigger_test", - "//pkg/cmd/testfilter:testfilter_test", "//pkg/col/coldata:coldata_disallowed_imports_test", "//pkg/col/coldata:coldata_test", "//pkg/col/coldataext:coldataext_test", @@ -585,6 +585,7 @@ ALL_TESTS = [ "//pkg/util/quantile:quantile_test", "//pkg/util/quotapool:quotapool_test", "//pkg/util/randutil:randutil_test", + "//pkg/util/rangedesciter:rangedesciter_test", "//pkg/util/retry:retry_test", "//pkg/util/ring:ring_test", "//pkg/util/schedulerlatency:schedulerlatency_test", @@ -862,6 +863,10 @@ GO_TARGETS = [ "//pkg/clusterversion:clusterversion_test", "//pkg/cmd/allocsim:allocsim", "//pkg/cmd/allocsim:allocsim_lib", + "//pkg/cmd/bazci/githubpost:githubpost", + "//pkg/cmd/bazci/githubpost:githubpost_test", + "//pkg/cmd/bazci/testfilter:testfilter", + "//pkg/cmd/bazci/testfilter:testfilter_test", "//pkg/cmd/bazci:bazci", "//pkg/cmd/bazci:bazci_lib", "//pkg/cmd/cmdutil:cmdutil", @@ -917,7 +922,6 @@ GO_TARGETS = [ "//pkg/cmd/geoviz:geoviz_lib", "//pkg/cmd/github-post:github-post", "//pkg/cmd/github-post:github-post_lib", - "//pkg/cmd/github-post:github-post_test", "//pkg/cmd/github-pull-request-make:github-pull-request-make", "//pkg/cmd/github-pull-request-make:github-pull-request-make_lib", "//pkg/cmd/github-pull-request-make:github-pull-request-make_test", @@ -993,7 +997,6 @@ GO_TARGETS = [ "//pkg/cmd/teamcity-trigger:teamcity-trigger_test", "//pkg/cmd/testfilter:testfilter", "//pkg/cmd/testfilter:testfilter_lib", - "//pkg/cmd/testfilter:testfilter_test", "//pkg/cmd/uptodate:uptodate", "//pkg/cmd/uptodate:uptodate_lib", "//pkg/cmd/urlcheck/lib/urlcheck:urlcheck", @@ -2018,6 +2021,8 @@ GO_TARGETS = [ "//pkg/util/quotapool:quotapool_test", "//pkg/util/randutil:randutil", "//pkg/util/randutil:randutil_test", + "//pkg/util/rangedesciter:rangedesciter", + "//pkg/util/rangedesciter:rangedesciter_test", "//pkg/util/retry:retry", "//pkg/util/retry:retry_test", "//pkg/util/ring:ring", @@ -2300,6 +2305,8 @@ GET_X_DATA_TARGETS = [ "//pkg/clusterversion:get_x_data", "//pkg/cmd/allocsim:get_x_data", "//pkg/cmd/bazci:get_x_data", + "//pkg/cmd/bazci/githubpost:get_x_data", + "//pkg/cmd/bazci/testfilter:get_x_data", "//pkg/cmd/cmdutil:get_x_data", "//pkg/cmd/cmp-protocol:get_x_data", "//pkg/cmd/cmp-protocol/pgconnect:get_x_data", @@ -2991,6 +2998,7 @@ GET_X_DATA_TARGETS = [ "//pkg/util/quantile:get_x_data", "//pkg/util/quotapool:get_x_data", "//pkg/util/randutil:get_x_data", + "//pkg/util/rangedesciter:get_x_data", "//pkg/util/retry:get_x_data", "//pkg/util/ring:get_x_data", "//pkg/util/schedulerlatency:get_x_data", diff --git a/pkg/ccl/cliccl/BUILD.bazel b/pkg/ccl/cliccl/BUILD.bazel index 5a88f2e73fdb..473033a01e40 100644 --- a/pkg/ccl/cliccl/BUILD.bazel +++ b/pkg/ccl/cliccl/BUILD.bazel @@ -30,6 +30,7 @@ go_library( "//pkg/util/timeutil", "@com_github_cockroachdb_errors//:errors", "@com_github_cockroachdb_errors//oserror", + "@com_github_cockroachdb_pebble//vfs", "@com_github_spf13_cobra//:cobra", ], ) @@ -52,8 +53,10 @@ go_test( "//pkg/server", "//pkg/storage", "//pkg/testutils/serverutils", + "//pkg/util/envutil", "//pkg/util/leaktest", "//pkg/util/log", + "//pkg/util/randutil", "@com_github_spf13_cobra//:cobra", "@com_github_stretchr_testify//require", ], diff --git a/pkg/ccl/cliccl/debug.go b/pkg/ccl/cliccl/debug.go index dd346aa8f8b1..a30de500cec6 100644 --- a/pkg/ccl/cliccl/debug.go +++ b/pkg/ccl/cliccl/debug.go @@ -92,11 +92,22 @@ stdout. RunE: clierrorplus.MaybeDecorateError(runDecrypt), } + encryptionRegistryList := &cobra.Command{ + Use: "encryption-registry-list ", + Short: "list files in the encryption-at-rest file registry", + Long: `Prints a list of files in an Encryption At Rest file registry, along +with their env type and encryption settings (if applicable). +`, + Args: cobra.MinimumNArgs(1), + RunE: clierrorplus.MaybeDecorateError(runList), + } + // Add commands to the root debug command. // We can't add them to the lists of commands (eg: DebugCmdsForPebble) as cli init() is called before us. cli.DebugCmd.AddCommand(encryptionStatusCmd) cli.DebugCmd.AddCommand(encryptionActiveKeyCmd) cli.DebugCmd.AddCommand(encryptionDecryptCmd) + cli.DebugCmd.AddCommand(encryptionRegistryList) // Add the encryption flag to commands that need it. // For the encryption-status command. @@ -108,6 +119,9 @@ stdout. // For the encryption-decrypt command. f = encryptionDecryptCmd.Flags() cliflagcfg.VarFlag(f, &storeEncryptionSpecs, cliflagsccl.EnterpriseEncryption) + // For the encryption-registry-list command. + f = encryptionRegistryList.Flags() + cliflagcfg.VarFlag(f, &storeEncryptionSpecs, cliflagsccl.EnterpriseEncryption) // Add encryption flag to all OSS debug commands that want it. for _, cmd := range cli.DebugCommandsRequiringEncryption { diff --git a/pkg/ccl/cliccl/ear.go b/pkg/ccl/cliccl/ear.go index c00dc068ee6c..722a902e5672 100644 --- a/pkg/ccl/cliccl/ear.go +++ b/pkg/ccl/cliccl/ear.go @@ -9,14 +9,21 @@ package cliccl import ( + "bytes" "context" + "fmt" "io" "os" + "sort" + "github.com/cockroachdb/cockroach/pkg/ccl/storageccl/engineccl/enginepbccl" "github.com/cockroachdb/cockroach/pkg/cli" "github.com/cockroachdb/cockroach/pkg/storage" + "github.com/cockroachdb/cockroach/pkg/storage/enginepb" + "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/cockroach/pkg/util/stop" "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/vfs" "github.com/spf13/cobra" ) @@ -57,3 +64,61 @@ func runDecrypt(_ *cobra.Command, args []string) (returnErr error) { return nil } + +type fileEntry struct { + name string + envType enginepb.EnvType + settings enginepbccl.EncryptionSettings +} + +func (f fileEntry) String() string { + var b bytes.Buffer + _, _ = fmt.Fprintf( + &b, "%s:\n env type: %s, %s\n", + f.name, f.envType, f.settings.EncryptionType, + ) + if f.settings.EncryptionType != enginepbccl.EncryptionType_Plaintext { + _, _ = fmt.Fprintf( + &b, " keyID: %s\n nonce: % x\n counter: %d", + f.settings.KeyId, f.settings.Nonce, f.settings.Counter, + ) + } + return b.String() +} + +func runList(cmd *cobra.Command, args []string) error { + dir := args[0] + + fr := &storage.PebbleFileRegistry{ + FS: vfs.Default, + DBDir: dir, + ReadOnly: true, + } + if err := fr.Load(); err != nil { + return errors.Wrapf(err, "could not load file registry") + } + defer func() { _ = fr.Close() }() + + // List files and print to stdout. + var entries []fileEntry + for name, entry := range fr.List() { + var encSettings enginepbccl.EncryptionSettings + settings := entry.EncryptionSettings + if err := protoutil.Unmarshal(settings, &encSettings); err != nil { + return errors.Wrapf(err, "could not unmarshal encryption settings for %s", name) + } + entries = append(entries, fileEntry{ + name: name, + envType: entry.EnvType, + settings: encSettings, + }) + } + sort.Slice(entries, func(i, j int) bool { + return entries[i].name < entries[j].name + }) + for _, e := range entries { + _, _ = fmt.Fprintf(cmd.OutOrStdout(), "%s\n", e) + } + + return nil +} diff --git a/pkg/ccl/cliccl/ear_test.go b/pkg/ccl/cliccl/ear_test.go index 1fb77826e431..64353c04a5e9 100644 --- a/pkg/ccl/cliccl/ear_test.go +++ b/pkg/ccl/cliccl/ear_test.go @@ -11,6 +11,7 @@ package cliccl import ( "bytes" "context" + "crypto/rand" "fmt" "path/filepath" "strings" @@ -22,8 +23,10 @@ import ( _ "github.com/cockroachdb/cockroach/pkg/ccl/storageccl/engineccl" "github.com/cockroachdb/cockroach/pkg/cli" "github.com/cockroachdb/cockroach/pkg/storage" + "github.com/cockroachdb/cockroach/pkg/util/envutil" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/spf13/cobra" "github.com/stretchr/testify/require" ) @@ -97,6 +100,97 @@ func TestDecrypt(t *testing.T) { require.Contains(t, out, checkStr) } +func TestList(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + // Pin the random generator to use a fixed seed. This also requires overriding + // the PRNG for the duration of the test. This ensures that all the keys + // generated by the key manager are deterministic. + reset := envutil.TestSetEnv(t, "COCKROACH_RANDOM_SEED", "1665612120123601000") + defer reset() + randBefore := rand.Reader + randOverride, _ := randutil.NewPseudoRand() + rand.Reader = randOverride + defer func() { rand.Reader = randBefore }() + + ctx := context.Background() + dir := t.TempDir() + + // Generate a new encryption key to use. + keyPath := filepath.Join(dir, "aes.key") + err := cli.GenEncryptionKeyCmd.RunE(nil, []string{keyPath}) + require.NoError(t, err) + + // Spin up a new encrypted store. + encSpecStr := fmt.Sprintf("path=%s,key=%s,old-key=plain", dir, keyPath) + encSpec, err := baseccl.NewStoreEncryptionSpec(encSpecStr) + require.NoError(t, err) + encOpts, err := encSpec.ToEncryptionOptions() + require.NoError(t, err) + p, err := storage.Open(ctx, storage.Filesystem(dir), storage.EncryptionAtRest(encOpts)) + require.NoError(t, err) + + // Write a key and flush, to create a table in the store. + err = p.PutUnversioned([]byte("foo"), nil) + require.NoError(t, err) + err = p.Flush() + require.NoError(t, err) + p.Close() + + // List the files in the registry. + cmd := getTool(cli.DebugCmd, []string{"debug", "encryption-registry-list"}) + require.NotNil(t, cmd) + var b bytes.Buffer + cmd.SetOut(&b) + cmd.SetErr(&b) + err = runList(cmd, []string{dir}) + require.NoError(t, err) + + const want = `000002.log: + env type: Data, AES128_CTR + keyID: bbb65a9d114c2a18740f27b6933b74f61018bd5adf545c153b48ffe6473336ef + nonce: 06 c2 26 f9 68 f0 fc ff b9 e7 82 8f + counter: 914487965 +000004.log: + env type: Data, AES128_CTR + keyID: bbb65a9d114c2a18740f27b6933b74f61018bd5adf545c153b48ffe6473336ef + nonce: 80 18 c0 79 61 c7 cf ef b4 25 4e 78 + counter: 1483615076 +000005.sst: + env type: Data, AES128_CTR + keyID: bbb65a9d114c2a18740f27b6933b74f61018bd5adf545c153b48ffe6473336ef + nonce: 71 12 f7 22 9a fb 90 24 4e 58 27 01 + counter: 3082989236 +COCKROACHDB_DATA_KEYS_000001_monolith: + env type: Store, AES128_CTR + keyID: f594229216d81add7811c4360212eb7629b578ef4eab6e5d05679b3c5de48867 + nonce: 8f 4c ba 1a a3 4f db 3c db 84 cf f5 + counter: 2436226951 +CURRENT: + env type: Data, AES128_CTR + keyID: bbb65a9d114c2a18740f27b6933b74f61018bd5adf545c153b48ffe6473336ef + nonce: 18 c2 a6 23 cc 6e 2e 7c 8e bf 84 77 + counter: 3159373900 +MANIFEST-000001: + env type: Data, AES128_CTR + keyID: bbb65a9d114c2a18740f27b6933b74f61018bd5adf545c153b48ffe6473336ef + nonce: 2e fd 49 2f 5f c5 53 0a e8 8f 78 cc + counter: 110434741 +OPTIONS-000003: + env type: Data, AES128_CTR + keyID: bbb65a9d114c2a18740f27b6933b74f61018bd5adf545c153b48ffe6473336ef + nonce: d3 97 11 b3 1a ed 22 2b 74 fb 02 0c + counter: 1229228536 +marker.datakeys.000001.COCKROACHDB_DATA_KEYS_000001_monolith: + env type: Store, AES128_CTR + keyID: f594229216d81add7811c4360212eb7629b578ef4eab6e5d05679b3c5de48867 + nonce: 55 d7 d4 27 6c 97 9b dd f1 5d 40 c8 + counter: 467030050 +` + require.Equal(t, want, b.String()) +} + // getTool traverses the given cobra.Command recursively, searching for a tool // matching the given command. func getTool(cmd *cobra.Command, want []string) *cobra.Command { diff --git a/pkg/cmd/bazci/BUILD.bazel b/pkg/cmd/bazci/BUILD.bazel index 5c7a0525de31..1fa1f82ea583 100644 --- a/pkg/cmd/bazci/BUILD.bazel +++ b/pkg/cmd/bazci/BUILD.bazel @@ -12,6 +12,8 @@ go_library( deps = [ "//pkg/build/bazel/bes", "//pkg/build/util", + "//pkg/cmd/bazci/githubpost", + "//pkg/cmd/bazci/testfilter", "@com_github_alessio_shellescape//:shellescape", "@com_github_cockroachdb_errors//:errors", "@com_github_gogo_protobuf//proto", diff --git a/pkg/cmd/bazci/bazci.go b/pkg/cmd/bazci/bazci.go index a2cc670d628f..978c5f02f9e2 100644 --- a/pkg/cmd/bazci/bazci.go +++ b/pkg/cmd/bazci/bazci.go @@ -26,6 +26,8 @@ import ( "github.com/alessio/shellescape" bes "github.com/cockroachdb/cockroach/pkg/build/bazel/bes" bazelutil "github.com/cockroachdb/cockroach/pkg/build/util" + "github.com/cockroachdb/cockroach/pkg/cmd/bazci/githubpost" + "github.com/cockroachdb/cockroach/pkg/cmd/bazci/testfilter" "github.com/cockroachdb/errors" "github.com/gogo/protobuf/proto" "github.com/spf13/cobra" @@ -49,7 +51,9 @@ type fullTestResult struct { } var ( - artifactsDir string + artifactsDir string + githubPostFormatterName string + shouldProcessTestFailures bool rootCmd = &cobra.Command{ Use: "bazci", @@ -67,40 +71,77 @@ func init() { &artifactsDir, "artifacts_dir", "/artifacts", - "path where artifacts should be staged") + "path where artifacts should be staged", + ) + rootCmd.Flags().StringVar( + &githubPostFormatterName, + "formatter", + "default", + "formatter name for githubpost", + ) + rootCmd.Flags().BoolVar( + &shouldProcessTestFailures, + "process_test_failures", + false, + "process failures artifacts (and post github issues for release failures)", + ) } -func bazciImpl(cmd *cobra.Command, args []string) error { +func bazciImpl(cmd *cobra.Command, args []string) (retErr error) { + var goTestJSONOutputFilePath string + defer func() { + if err := processTestJSONIfNeeded(retErr != nil /* shouldCreateTarball */, goTestJSONOutputFilePath); err != nil { + fmt.Printf("failed to process go test json output - %v\n", err) + } + }() + if args[0] != buildSubcmd && args[0] != runSubcmd && args[0] != testSubcmd && args[0] != mungeTestXMLSubcmd && args[0] != mergeTestXMLsSubcmd { - return errors.Newf("First argument must be `build`, `run`, `test`, `merge-test-xmls`, or `munge-test-xml`; got %v", args[0]) + retErr = errors.Newf("First argument must be `build`, `run`, `test`, `merge-test-xmls`, or `munge-test-xml`; got %v", args[0]) + return } // Special case: munge-test-xml/merge-test-xmls don't require running Bazel at all. // Perform the munge then exit immediately. if args[0] == mungeTestXMLSubcmd { - return mungeTestXMLs(args) + retErr = mungeTestXMLs(args) + return } if args[0] == mergeTestXMLsSubcmd { - return mergeTestXMLs(args) + retErr = mergeTestXMLs(args) + return } - tmpDir, err := os.MkdirTemp("", "") - if err != nil { - return err + tmpDir, retErr := os.MkdirTemp("", "") + if retErr != nil { + return } bepLoc := filepath.Join(tmpDir, "beplog") args = append(args, fmt.Sprintf("--build_event_binary_file=%s", bepLoc)) + if shouldProcessTestFailures { + f, createTempErr := os.CreateTemp(artifactsDir, "test.json.txt") + if createTempErr != nil { + retErr = createTempErr + return + } + goTestJSONOutputFilePath = f.Name() + // Closing the file because we will not use the file pointer. + if retErr = f.Close(); retErr != nil { + return + } + args = append(args, "--test_env", goTestJSONOutputFilePath) + } + fmt.Println("running bazel w/ args: ", shellescape.QuoteCommand(args)) bazelCmd := exec.Command("bazel", args...) bazelCmd.Stdout = os.Stdout bazelCmd.Stderr = os.Stderr - err = bazelCmd.Run() - if err != nil { - fmt.Printf("got error %+v from bazel run\n", err) + bazelCmdErr := bazelCmd.Run() + if bazelCmdErr != nil { + fmt.Printf("got error %+v from bazel run\n", bazelCmdErr) fmt.Println("WARNING: the beplog file may not have been created") } - - return processBuildEventProtocolLog(args[0], bepLoc) + retErr = processBuildEventProtocolLog(args[0], bepLoc) + return } func mungeTestXMLs(args []string) error { @@ -268,3 +309,129 @@ func doCopy(src, dst string) error { } return err } + +func processFailures(goTestJSONOutputFileBuf []byte, failuresFilePath string) error { + pr, pw := io.Pipe() + err := testfilter.FilterAndWrite(bytes.NewReader(goTestJSONOutputFileBuf), pw, []string{"strip", "omit", "convert"}) + if err != nil { + return err + } + f, err := os.Create(failuresFilePath) + if err != nil { + return err + } + written, err := io.Copy(f, pr) + if err != nil { + return err + } + if err := f.Close(); err != nil { + return err + } + if written == 0 { + if err := os.Remove(failuresFilePath); err != nil { + return err + } + } + return nil +} + +func postReleaseOnlyFailures(goTestJSONOutputFileBuf []byte) error { + branch := strings.TrimPrefix(os.Getenv("TC_BUILD_BRANCH"), "refs/heads/") + isReleaseBranch := strings.HasPrefix(branch, "master") || strings.HasPrefix(branch, "release") || strings.HasPrefix(branch, "provisional") + if isReleaseBranch { + // GITHUB_API_TOKEN must be in the env or github-post will barf if it's + // ever asked to post, so enforce that on all runs. + // The way this env var is made available here is quite tricky. The build + // calling this method is usually a build that is invoked from PRs, so it + // can't have secrets available to it (for the PR could modify + // build/teamcity-* to leak the secret). Instead, we provide the secrets + // to a higher-level job (Publish Bleeding Edge) and use TeamCity magic to + // pass that env var through when it's there. This means we won't have the + // env var on PR builds, but we'll have it for builds that are triggered + // from the release branches. + if os.Getenv("GITHUB_API_TOKEN") == "" { + return errors.New("GITHUB_API_TOKEN must be set") + } + githubpost.Post(githubPostFormatterName, bytes.NewReader(goTestJSONOutputFileBuf)) + } + return nil +} + +// createTarball converts the test json output file into output intended for human eyes +// and creates a tarball that contains the original json file and the converted +// human-readable file. +func createTarball(goTestJSONOutputFilePath string) error { + buf, err := os.ReadFile(goTestJSONOutputFilePath) + if err != nil { + return err + } + f, err := os.Create(filepath.Join(artifactsDir, "full_output.txt")) + if err != nil { + return err + } + if err := testfilter.FilterAndWrite(bytes.NewReader(buf), f, []string{"convert"}); err != nil { + return err + } + + tarArgs := []string{ + "--strip-components=1", + "-czf", + "full_output.tgz", + "full_output.txt", + filepath.Base(goTestJSONOutputFilePath), + } + createTarballCmd := exec.Command("tar", tarArgs...) + createTarballCmd.Dir = artifactsDir + var errBuf bytes.Buffer + createTarballCmd.Stderr = &errBuf + fmt.Println("running tar w/ args: ", shellescape.QuoteCommand(tarArgs)) + if err := createTarballCmd.Run(); err != nil { + return errors.Wrapf(err, "StdErr: %s", errBuf.String()) + } + if err := os.Remove(filepath.Join(artifactsDir, "full_output.txt")); err != nil { + fmt.Printf("Failed to remove full_output.txt - %v\n", err) + } + return nil +} + +// Some unit tests test automatic ballast creation. These ballasts can be +// larger than the maximum artifact size. Remove any artifacts with the +// EMERGENCY_BALLAST filename. +func removeEmergencyBallasts() { + findCmdArgs := []string{ + "-name", + artifactsDir, + "EMERGENCY_BALLAST", + "-delete", + } + findCmd := exec.Command("find", findCmdArgs...) + var errBuf bytes.Buffer + findCmd.Stderr = &errBuf + if err := findCmd.Run(); err != nil { + fmt.Println("running find w/ args: ", shellescape.QuoteCommand(findCmdArgs)) + fmt.Printf("Failed with err %v\nStdErr: %v", err, errBuf.String()) + } +} + +func processTestJSONIfNeeded(shouldCreateTarball bool, goTestJSONOutputFilePath string) error { + if !shouldProcessTestFailures { + return nil + } + removeEmergencyBallasts() + buf, err := os.ReadFile(goTestJSONOutputFilePath) + if err != nil { + return err + } + if err := processFailures(buf, filepath.Join(artifactsDir, "failures.txt")); err != nil { + return err + } + if err := postReleaseOnlyFailures(buf); err != nil { + return err + } + if shouldCreateTarball { + if err := createTarball(goTestJSONOutputFilePath); err != nil { + return err + } + } + return nil +} diff --git a/pkg/cmd/bazci/githubpost/BUILD.bazel b/pkg/cmd/bazci/githubpost/BUILD.bazel new file mode 100644 index 000000000000..1b872eb4dc22 --- /dev/null +++ b/pkg/cmd/bazci/githubpost/BUILD.bazel @@ -0,0 +1,32 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "githubpost", + srcs = ["githubpost.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/cmd/bazci/githubpost", + visibility = ["//visibility:public"], + deps = [ + "//pkg/cmd/internal/issues", + "//pkg/internal/codeowners", + "//pkg/internal/team", + "@com_github_cockroachdb_errors//:errors", + ], +) + +go_test( + name = "githubpost_test", + srcs = ["githubpost_test.go"], + args = ["-test.timeout=295s"], + data = glob(["testdata/**"]), + embed = [":githubpost"], + tags = ["broken_in_bazel"], + deps = [ + "//pkg/cmd/internal/issues", + "//pkg/testutils", + "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", + ], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/cmd/bazci/githubpost/githubpost.go b/pkg/cmd/bazci/githubpost/githubpost.go new file mode 100644 index 000000000000..e348128283f7 --- /dev/null +++ b/pkg/cmd/bazci/githubpost/githubpost.go @@ -0,0 +1,599 @@ +// Copyright 2016 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Command github-post parses the JSON-formatted output from a Go test session, +// as generated by either 'go test -json' or './pkg.test | go tool test2json -t', +// and posts issues for any failed tests to GitHub. If there are no failed +// tests, it assumes that there was a build error and posts the entire log to +// GitHub. + +package githubpost + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "log" + "os" + "os/exec" + "path/filepath" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/cockroachdb/cockroach/pkg/cmd/internal/issues" + "github.com/cockroachdb/cockroach/pkg/internal/codeowners" + "github.com/cockroachdb/cockroach/pkg/internal/team" + "github.com/cockroachdb/errors" +) + +const ( + pkgEnv = "PKG" + unknown = "(unknown)" +) + +type formatter func(context.Context, failure) (issues.IssueFormatter, issues.PostRequest) + +func defaultFormatter(ctx context.Context, f failure) (issues.IssueFormatter, issues.PostRequest) { + teams := getOwner(ctx, f.packageName, f.testName) + repro := fmt.Sprintf("./dev test ./pkg/%s --race --stress -f %s TESTTIMEOUT=5m STRESSFLAGS='-timeout 5m' 2>&1", + trimPkg(f.packageName), f.testName) + + var projColID int + var mentions []string + if len(teams) > 0 { + projColID = teams[0].TriageColumnID + for _, team := range teams { + mentions = append(mentions, "@"+string(team.Name())) + } + } + return issues.UnitTestFormatter, issues.PostRequest{ + TestName: f.testName, + PackageName: f.packageName, + Message: f.testMessage, + Artifacts: "/", // best we can do for unit tests + HelpCommand: issues.UnitTestHelpCommand(repro), + MentionOnCreate: mentions, + ProjectColumnID: projColID, + } +} + +// Post parses the JSON-formatted output from a Go test session, +// as generated by either 'go test -json' or './pkg.test | go tool test2json -t', +// and posts issues for any failed tests to GitHub. If there are no failed +// tests, it assumes that there was a build error and posts the entire log to +// GitHub. +func Post(formatterName string, in io.Reader) { + var reqFromFailure formatter + switch formatterName { + case "pebble-metamorphic": + reqFromFailure = formatPebbleMetamorphicIssue + default: + reqFromFailure = defaultFormatter + } + + fileIssue := func(ctx context.Context, f failure) error { + fmter, req := reqFromFailure(ctx, f) + return issues.Post(ctx, fmter, req) + } + + ctx := context.Background() + if err := listFailures(ctx, in, fileIssue); err != nil { + log.Println(err) // keep going + } +} + +type failure struct { + title string + packageName string + testName string + testMessage string +} + +// This struct is described in the test2json documentation. +// https://golang.org/cmd/test2json/ +type testEvent struct { + Action string + Package string + Test string + Output string + Time time.Time // encodes as an RFC3339-format string + Elapsed float64 // seconds +} + +type scopedTest struct { + pkg string + name string +} + +func scoped(te testEvent) scopedTest { + if te.Package == "" { + return scopedTest{pkg: mustPkgFromEnv(), name: te.Test} + } + return scopedTest{pkg: te.Package, name: te.Test} +} + +func mustPkgFromEnv() string { + packageName := os.Getenv(pkgEnv) + if packageName == "" { + panic(errors.Errorf("package name environment variable %s is not set", pkgEnv)) + } + return packageName +} + +func maybeEnv(envKey, defaultValue string) string { + v := os.Getenv(envKey) + if v == "" { + return defaultValue + } + return v +} + +func shortPkg() string { + packageName := maybeEnv(pkgEnv, "unknown") + return trimPkg(packageName) +} + +func trimPkg(pkg string) string { + return strings.TrimPrefix(pkg, issues.CockroachPkgPrefix) +} + +func listFailures( + ctx context.Context, input io.Reader, fileIssue func(context.Context, failure) error, +) error { + // Tests that took less than this are not even considered for slow test + // reporting. This is so that we protect against large number of + // programmatically-generated subtests. + const shortTestFilterSecs float64 = 0.5 + var timeoutMsg = "panic: test timed out after" + + var packageOutput bytes.Buffer + + // map from test name to list of events (each log line is an event, plus + // start and pass/fail events). + // Tests/events are "outstanding" until we see a final pass/fail event. + // Because of the way the go test runner prints output, in case a subtest times + // out or panics, we don't get a pass/fail event for sibling and ancestor + // tests. Those tests will remain "outstanding" and will be ignored for the + // purpose of issue reporting. + outstandingOutput := make(map[scopedTest][]testEvent) + failures := make(map[scopedTest][]testEvent) + var slowPassEvents []testEvent + var slowFailEvents []testEvent + + // init is true for the preamble of the input before the first "run" test + // event. + init := true + // trustTimestamps will be set if we don't find a marker suggesting that the + // input comes from a stress run. In that case, stress prints all its output + // at once (for a captured failed test run), so the test2json timestamps are + // meaningless. + trustTimestamps := true + // elapsedTotalSec accumulates the time spent in all tests, passing or + // failing. In case the input comes from a stress run, this will be used to + // deduce the duration of a timed out test. + var elapsedTotalSec float64 + // Will be set if the last test timed out. + var timedOutCulprit scopedTest + var timedOutEvent testEvent + var curTestStart time.Time + var last scopedTest + var lastEvent testEvent + scanner := bufio.NewScanner(input) + for scanner.Scan() { + var te testEvent + { + line := scanner.Text() // has no EOL marker + if len(line) <= 2 || line[0] != '{' || line[len(line)-1] != '}' { + // This line is not test2json output, skip it. This can happen if + // whatever feeds our input has some extra non-JSON lines such as + // would happen with `make` invocations. + continue + } + if err := json.Unmarshal([]byte(line), &te); err != nil { + return errors.Wrapf(err, "unable to parse %q", line) + } + } + lastEvent = te + + if te.Test != "" { + init = false + } + if init && strings.Contains(te.Output, "-exec 'stress '") { + trustTimestamps = false + } + if timedOutCulprit.name == "" && te.Elapsed > 0 { + // We don't count subtests as those are counted in the parent. + if split := strings.SplitN(te.Test, "/", 2); len(split) == 1 { + elapsedTotalSec += te.Elapsed + } + } + + if timedOutCulprit.name == te.Test && te.Elapsed != 0 { + te.Elapsed = timedOutEvent.Elapsed + } + + // Events for the overall package test do not set Test. + if len(te.Test) > 0 { + switch te.Action { + case "run": + last = scoped(te) + if trustTimestamps { + curTestStart = te.Time + } + case "output": + key := scoped(te) + outstandingOutput[key] = append(outstandingOutput[key], te) + if strings.Contains(te.Output, timeoutMsg) { + timedOutCulprit = key + + // Fill in the Elapsed field for a timeout event. + // As of go1.11, the Elapsed field is bogus for fail events for timed + // out tests, so we do our own computation. + // See https://github.com/golang/go/issues/27568 + // + // Also, if the input is coming from stress, there will not even be a + // fail event for the test, so the Elapsed field computed here will be + // useful. + if trustTimestamps { + te.Elapsed = te.Time.Sub(curTestStart).Seconds() + } else { + // If we don't trust the timestamps, then we compute the test's + // duration by subtracting all the durations that we've seen so far + // (which we do trust to some extent). Note that this is not + // entirely accurate, since there's no information about the + // duration about sibling subtests which may have run. And further + // note that it doesn't work well at all for small timeouts because + // the resolution that the test durations have is just tens of + // milliseconds, so many quick tests are rounded of to a duration of + // 0. + re := regexp.MustCompile(`panic: test timed out after (\d*(?:\.\d*)?)(.)`) + matches := re.FindStringSubmatch(te.Output) + if matches == nil { + log.Printf("failed to parse timeout message: %s", te.Output) + te.Elapsed = -1 + } else { + dur, err := strconv.ParseFloat(matches[1], 64) + if err != nil { + return err + } + if matches[2] == "m" { + // minutes to seconds + dur *= 60 + } else if matches[2] != "s" { + return fmt.Errorf("unexpected time unit in: %s", te.Output) + } + te.Elapsed = dur - elapsedTotalSec + } + } + timedOutEvent = te + } + case "pass", "skip": + if timedOutCulprit.name != "" { + // NB: we used to do this: + // panic(fmt.Sprintf("detected test timeout but test seems to have passed (%+v)", te)) + // but it would get hit. There is no good way to + // blame a timeout on a particular test. We should probably remove this + // logic in the first place, but for now make sure we don't panic as + // a result of it. + timedOutCulprit = scopedTest{} + } + delete(outstandingOutput, scoped(te)) + if te.Elapsed > shortTestFilterSecs { + // We ignore subtests; their time contributes to the parent's. + if !strings.Contains(te.Test, "/") { + slowPassEvents = append(slowPassEvents, te) + } + } + case "fail": + key := scoped(te) + // Record slow tests. We ignore subtests; their time contributes to the + // parent's. Except the timed out (sub)test, for which the parent (if + // any) is not going to appear in the report because there's not going + // to be a pass/fail event for it. + if !strings.Contains(te.Test, "/") || timedOutCulprit == key { + slowFailEvents = append(slowFailEvents, te) + } + // Move the test to the failures collection unless the test timed out. + // We have special reporting for timeouts below. + if timedOutCulprit != key { + failures[key] = outstandingOutput[key] + } + delete(outstandingOutput, key) + } + } else if te.Action == "output" { + // Output was outside the context of a test. This consists mostly of the + // preamble and epilogue that Make outputs, but also any log messages that + // are printed by a test binary's main function. + packageOutput.WriteString(te.Output) + } + } + + // On timeout, we might or might not have gotten a fail event for the timed + // out test (we seem to get one when processing output from a test binary run, + // but not when processing the output of `stress`, which adds some lines at + // the end). If we haven't gotten a fail event, the test's output is still + // outstanding and the test is not registered in the slowFailEvents + // collection. The timeout handling code below relies on slowFailEvents not + // being empty though, so we'll process the test here. + if timedOutCulprit.name != "" { + if _, ok := outstandingOutput[timedOutCulprit]; ok { + slowFailEvents = append(slowFailEvents, timedOutEvent) + delete(outstandingOutput, timedOutCulprit) + } + } else { + // If we haven't received a final event for the last test, then a + // panic/log.Fatal must have happened. Consider it failed. + // Note that because of https://github.com/golang/go/issues/27582 there + // might be other outstanding tests; we ignore those. + if _, ok := outstandingOutput[last]; ok { + log.Printf("found outstanding output. Considering last test failed: %s", last) + failures[last] = outstandingOutput[last] + } + } + + // test2json always puts a fail event last unless it sees a big pass message + // from the test output. + if lastEvent.Action == "fail" && len(failures) == 0 && timedOutCulprit.name == "" { + // If we couldn't find a failing Go test, assume that a failure occurred + // before running Go and post an issue about that. + err := fileIssue(ctx, failure{ + title: fmt.Sprintf("%s: package failed", shortPkg()), + packageName: maybeEnv(pkgEnv, "unknown"), + testName: unknown, + testMessage: packageOutput.String(), + }) + if err != nil { + return errors.Wrap(err, "failed to post issue") + } + } else { + for test, testEvents := range failures { + if split := strings.SplitN(test.name, "/", 2); len(split) == 2 { + parentTest, subTest := scopedTest{pkg: test.pkg, name: split[0]}, scopedTest{pkg: test.pkg, name: split[1]} + log.Printf("consolidating failed subtest %q into parent test %q", subTest.name, parentTest.name) + failures[parentTest] = append(failures[parentTest], testEvents...) + delete(failures, test) + } else { + log.Printf("failed parent test %q (no subtests)", test.name) + if _, ok := failures[test]; !ok { + return errors.AssertionFailedf("expected %q in 'failures'", test.name) + } + } + } + // Sort the failed tests to make the unit tests for this script deterministic. + var failedTestNames []scopedTest + for name := range failures { + failedTestNames = append(failedTestNames, name) + } + sort.Slice(failedTestNames, func(i, j int) bool { + return fmt.Sprint(failedTestNames[i]) < fmt.Sprint(failedTestNames[j]) + }) + for _, test := range failedTestNames { + testEvents := failures[test] + var outputs []string + for _, testEvent := range testEvents { + outputs = append(outputs, testEvent.Output) + } + err := fileIssue(ctx, failure{ + title: fmt.Sprintf("%s: %s failed", trimPkg(test.pkg), test.name), + packageName: test.pkg, + testName: test.name, + testMessage: strings.Join(outputs, ""), + }) + if err != nil { + return errors.Wrap(err, "failed to post issue") + } + } + } + + // Sort slow tests descendingly by duration. + sort.Slice(slowPassEvents, func(i, j int) bool { + return slowPassEvents[i].Elapsed > slowPassEvents[j].Elapsed + }) + sort.Slice(slowFailEvents, func(i, j int) bool { + return slowFailEvents[i].Elapsed > slowFailEvents[j].Elapsed + }) + + report := genSlowTestsReport(slowPassEvents, slowFailEvents) + if err := writeSlowTestsReport(report); err != nil { + log.Printf("failed to create slow tests report: %s", err) + } + + // If the run timed out, file an issue. A couple of cases: + // 1) If the test that was running when the package timed out is the longest + // test, then we blame it. The common case is the test deadlocking - it would + // have run forever. + // 2) Otherwise, we don't blame anybody in particular. We file a generic issue + // listing the package name containing the report of long-running tests. + if timedOutCulprit.name != "" { + slowest := slowFailEvents[0] + if len(slowPassEvents) > 0 && slowPassEvents[0].Elapsed > slowest.Elapsed { + slowest = slowPassEvents[0] + } + + culpritOwner := fmt.Sprintf("%v", getOwner(ctx, timedOutCulprit.pkg, timedOutCulprit.name)) + slowEvents := append(slowFailEvents, slowPassEvents...) + // The predicate determines if the union of the slow events is owned by the _same_ team(s) as timedOutCulprit. + hasSameOwner := func() bool { + for _, slowEvent := range slowEvents { + scopedEvent := scoped(slowEvent) + owner := fmt.Sprintf("%v", getOwner(ctx, scopedEvent.pkg, scopedEvent.name)) + + if culpritOwner != owner { + log.Printf("%v has a different owner: %s;bailing out...\n", scopedEvent, owner) + return false + } + } + return true + } + if timedOutCulprit == scoped(slowest) || hasSameOwner() { + // The test that was running when the timeout hit is either the one that ran for + // the longest time or all other tests share the same owner. + // The test that was running when the timeout hit is the one that ran for + // the longest time. + log.Printf("timeout culprit found: %s\n", timedOutCulprit.name) + err := fileIssue(ctx, failure{ + title: fmt.Sprintf("%s: %s timed out", trimPkg(timedOutCulprit.pkg), timedOutCulprit.name), + packageName: timedOutCulprit.pkg, + testName: timedOutCulprit.name, + testMessage: report, + }) + if err != nil { + return errors.Wrap(err, "failed to post issue") + } + } else { + log.Printf("timeout culprit not found\n") + // TODO(irfansharif): These are assigned to nobody given our lack of + // a story around #51653. It'd be nice to be able to go from pkg + // name to team-name, and be able to assign to a specific team. + err := fileIssue(ctx, failure{ + title: fmt.Sprintf("%s: package timed out", shortPkg()), + packageName: maybeEnv(pkgEnv, "unknown"), + testName: unknown, + testMessage: report, + }) + if err != nil { + return errors.Wrap(err, "failed to post issue") + } + } + } + + return nil +} + +func genSlowTestsReport(slowPassingTests, slowFailingTests []testEvent) string { + var b strings.Builder + b.WriteString("Slow failing tests:\n") + for i, te := range slowFailingTests { + if i == 20 { + break + } + fmt.Fprintf(&b, "%s - %.2fs\n", te.Test, te.Elapsed) + } + if len(slowFailingTests) == 0 { + fmt.Fprint(&b, "\n") + } + + b.WriteString("\nSlow passing tests:\n") + for i, te := range slowPassingTests { + if i == 20 { + break + } + fmt.Fprintf(&b, "%s - %.2fs\n", te.Test, te.Elapsed) + } + if len(slowPassingTests) == 0 { + fmt.Fprint(&b, "\n") + } + return b.String() +} + +func writeSlowTestsReport(report string) error { + return os.WriteFile("artifacts/slow-tests-report.txt", []byte(report), 0644) +} + +// getFileLine returns the file (relative to repo root) and line for the given test. +// The package name is assumed relative to the repo root as well, i.e. pkg/foo/bar. +func getFileLine( + ctx context.Context, packageName, testName string, +) (_filename string, _linenum string, _ error) { + // Search the source code for the email address of the last committer to touch + // the first line of the source code that contains testName. Then, ask GitHub + // for the GitHub username of the user with that email address by searching + // commits in cockroachdb/cockroach for commits authored by the address. + subtests := strings.Split(testName, "/") + testName = subtests[0] + packageName = strings.TrimPrefix(packageName, "github.com/cockroachdb/cockroach/") + for { + if !strings.Contains(packageName, "pkg") { + return "", "", errors.Newf("could not find test %s", testName) + } + cmd := exec.Command(`/bin/bash`, `-c`, + fmt.Sprintf(`cd "$(git rev-parse --show-toplevel)" && git grep -n 'func %s(' '%s/*_test.go'`, + testName, packageName)) + // This command returns output such as: + // ../ccl/storageccl/export_test.go:31:func TestExportCmd(t *testing.T) { + out, err := cmd.CombinedOutput() + if err != nil { + fmt.Printf("couldn't find test %s in %s: %s %+v\n", + testName, packageName, string(out), err) + packageName = filepath.Dir(packageName) + continue + } + re := regexp.MustCompile(`(.*):(.*):`) + // The first 2 :-delimited fields are the filename and line number. + matches := re.FindSubmatch(out) + if matches == nil { + fmt.Printf("couldn't find filename/line number for test %s in %s: %s %+v\n", + testName, packageName, string(out), err) + packageName = filepath.Dir(packageName) + continue + } + return string(matches[1]), string(matches[2]), nil + } +} + +// getOwner looks up the file containing the given test and returns +// the owning teams. It does not return +// errors, but instead simply returns what it can. +// In case no owning team is found, "test-eng" team is returned. +func getOwner(ctx context.Context, packageName, testName string) (_teams []team.Team) { + filename, _, err := getFileLine(ctx, packageName, testName) + if err != nil { + log.Printf("errror getting file:line for %s.%s: %s", packageName, testName, err) + // Let's continue so that we can assign the "catch-all" owner. + } + co, err := codeowners.DefaultLoadCodeOwners() + if err != nil { + log.Printf("loading codeowners: %s", err) + return nil + } + match := co.Match(filename) + + if match == nil { + // N.B. if no owning team is found, we default to 'test-eng'. This should be a rare exception rather than the rule. + testEng := co.GetTeamForAlias("cockroachdb/test-eng") + if testEng.Name() == "" { + log.Fatalf("test-eng team could not be found in TEAMS.yaml") + } + log.Printf("assigning %s.%s to 'test-eng' as catch-all", packageName, testName) + match = []team.Team{testEng} + } + return match +} + +func formatPebbleMetamorphicIssue( + ctx context.Context, f failure, +) (issues.IssueFormatter, issues.PostRequest) { + var repro string + { + const seedHeader = "===== SEED =====\n" + i := strings.Index(f.testMessage, seedHeader) + if i != -1 { + s := f.testMessage[i+len(seedHeader):] + s = strings.TrimSpace(s) + s = strings.TrimSpace(s[:strings.Index(s, "\n")]) + repro = fmt.Sprintf("go test -mod=vendor -tags 'invariants' -exec 'stress -p 1' "+ + `-timeout 0 -test.v -run TestMeta$ ./internal/metamorphic -seed %s -ops "uniform:5000-10000"`, s) + } + } + return issues.UnitTestFormatter, issues.PostRequest{ + TestName: f.testName, + PackageName: f.packageName, + Message: f.testMessage, + Artifacts: "meta", + HelpCommand: issues.ReproductionCommandFromString(repro), + ExtraLabels: []string{"metamorphic-failure"}, + } +} diff --git a/pkg/cmd/github-post/main_test.go b/pkg/cmd/bazci/githubpost/githubpost_test.go similarity index 99% rename from pkg/cmd/github-post/main_test.go rename to pkg/cmd/bazci/githubpost/githubpost_test.go index ea3ccd71bd79..65b70f66f129 100644 --- a/pkg/cmd/github-post/main_test.go +++ b/pkg/cmd/bazci/githubpost/githubpost_test.go @@ -8,7 +8,7 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -package main +package githubpost import ( "context" diff --git a/pkg/cmd/github-post/testdata/README b/pkg/cmd/bazci/githubpost/testdata/README similarity index 100% rename from pkg/cmd/github-post/testdata/README rename to pkg/cmd/bazci/githubpost/testdata/README diff --git a/pkg/cmd/github-post/testdata/implicit-pkg.json b/pkg/cmd/bazci/githubpost/testdata/implicit-pkg.json similarity index 100% rename from pkg/cmd/github-post/testdata/implicit-pkg.json rename to pkg/cmd/bazci/githubpost/testdata/implicit-pkg.json diff --git a/pkg/cmd/github-post/testdata/pebble-metamorphic-panic.json b/pkg/cmd/bazci/githubpost/testdata/pebble-metamorphic-panic.json similarity index 100% rename from pkg/cmd/github-post/testdata/pebble-metamorphic-panic.json rename to pkg/cmd/bazci/githubpost/testdata/pebble-metamorphic-panic.json diff --git a/pkg/cmd/github-post/testdata/stress-failure.json b/pkg/cmd/bazci/githubpost/testdata/stress-failure.json similarity index 100% rename from pkg/cmd/github-post/testdata/stress-failure.json rename to pkg/cmd/bazci/githubpost/testdata/stress-failure.json diff --git a/pkg/cmd/github-post/testdata/stress-fatal.json b/pkg/cmd/bazci/githubpost/testdata/stress-fatal.json similarity index 100% rename from pkg/cmd/github-post/testdata/stress-fatal.json rename to pkg/cmd/bazci/githubpost/testdata/stress-fatal.json diff --git a/pkg/cmd/github-post/testdata/stress-init-panic.json b/pkg/cmd/bazci/githubpost/testdata/stress-init-panic.json similarity index 100% rename from pkg/cmd/github-post/testdata/stress-init-panic.json rename to pkg/cmd/bazci/githubpost/testdata/stress-init-panic.json diff --git a/pkg/cmd/github-post/testdata/stress-panic.json b/pkg/cmd/bazci/githubpost/testdata/stress-panic.json similarity index 100% rename from pkg/cmd/github-post/testdata/stress-panic.json rename to pkg/cmd/bazci/githubpost/testdata/stress-panic.json diff --git a/pkg/cmd/github-post/testdata/stress-subtests.json b/pkg/cmd/bazci/githubpost/testdata/stress-subtests.json similarity index 100% rename from pkg/cmd/github-post/testdata/stress-subtests.json rename to pkg/cmd/bazci/githubpost/testdata/stress-subtests.json diff --git a/pkg/cmd/github-post/testdata/stress-timeout-culprit-found.json b/pkg/cmd/bazci/githubpost/testdata/stress-timeout-culprit-found.json similarity index 100% rename from pkg/cmd/github-post/testdata/stress-timeout-culprit-found.json rename to pkg/cmd/bazci/githubpost/testdata/stress-timeout-culprit-found.json diff --git a/pkg/cmd/github-post/testdata/stress-timeout-culprit-not-found.json b/pkg/cmd/bazci/githubpost/testdata/stress-timeout-culprit-not-found.json similarity index 100% rename from pkg/cmd/github-post/testdata/stress-timeout-culprit-not-found.json rename to pkg/cmd/bazci/githubpost/testdata/stress-timeout-culprit-not-found.json diff --git a/pkg/cmd/github-post/testdata/stress-unknown.json b/pkg/cmd/bazci/githubpost/testdata/stress-unknown.json similarity index 100% rename from pkg/cmd/github-post/testdata/stress-unknown.json rename to pkg/cmd/bazci/githubpost/testdata/stress-unknown.json diff --git a/pkg/cmd/github-post/testdata/timeout-culprit-found.json b/pkg/cmd/bazci/githubpost/testdata/timeout-culprit-found.json similarity index 100% rename from pkg/cmd/github-post/testdata/timeout-culprit-found.json rename to pkg/cmd/bazci/githubpost/testdata/timeout-culprit-found.json diff --git a/pkg/cmd/github-post/testdata/timeout-culprit-not-found.json b/pkg/cmd/bazci/githubpost/testdata/timeout-culprit-not-found.json similarity index 100% rename from pkg/cmd/github-post/testdata/timeout-culprit-not-found.json rename to pkg/cmd/bazci/githubpost/testdata/timeout-culprit-not-found.json diff --git a/pkg/cmd/bazci/testfilter/BUILD.bazel b/pkg/cmd/bazci/testfilter/BUILD.bazel new file mode 100644 index 000000000000..fe935fd8abb5 --- /dev/null +++ b/pkg/cmd/bazci/testfilter/BUILD.bazel @@ -0,0 +1,25 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "testfilter", + srcs = ["testfilter.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/cmd/bazci/testfilter", + visibility = ["//visibility:public"], + deps = ["@org_golang_x_sync//errgroup"], +) + +go_test( + name = "testfilter_test", + srcs = ["testfilter_test.go"], + args = ["-test.timeout=295s"], + data = glob(["testdata/**"]), + embed = [":testfilter"], + deps = [ + "//pkg/testutils", + "//pkg/util/leaktest", + "@com_github_cockroachdb_datadriven//:datadriven", + ], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/cmd/testfilter/testdata/convert.txt b/pkg/cmd/bazci/testfilter/testdata/convert.txt similarity index 100% rename from pkg/cmd/testfilter/testdata/convert.txt rename to pkg/cmd/bazci/testfilter/testdata/convert.txt diff --git a/pkg/cmd/testfilter/testdata/omit.txt b/pkg/cmd/bazci/testfilter/testdata/omit.txt similarity index 100% rename from pkg/cmd/testfilter/testdata/omit.txt rename to pkg/cmd/bazci/testfilter/testdata/omit.txt diff --git a/pkg/cmd/testfilter/testdata/strip.txt b/pkg/cmd/bazci/testfilter/testdata/strip.txt similarity index 100% rename from pkg/cmd/testfilter/testdata/strip.txt rename to pkg/cmd/bazci/testfilter/testdata/strip.txt diff --git a/pkg/cmd/bazci/testfilter/testfilter.go b/pkg/cmd/bazci/testfilter/testfilter.go new file mode 100644 index 000000000000..93ad8cd1d8fd --- /dev/null +++ b/pkg/cmd/bazci/testfilter/testfilter.go @@ -0,0 +1,427 @@ +// Copyright 2019 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// testfilter is a utility to manipulate JSON streams in [test2json] format. +// Standard input is read and each line starting with `{` and ending with `}` +// parsed (and expected to parse successfully). Lines not matching this pattern +// are classified as output not related to the test and, depending on the args +// passed to `testfilter`, are passed through or removed. The arguments available +// are `--mode=(strip|omit|convert)`, where: +// +// strip: omit output for non-failing tests, pass everything else through. In +// +// particular, non-test output and tests that never terminate are passed through. +// +// omit: print only failing tests. Note that test2json does not close scopes for +// +// tests that are running in parallel (in the same package) with a "foreground" +// test that panics, so it will pass through *only* the one foreground test. +// Note also that package scopes are omitted; test2json does not reliably close +// them on panic/Exit anyway. +// +// convert: +// +// no filtering is performed, but any test2json input is translated back into +// its pure Go test framework text representation. This is useful for output +// intended for human eyes. +// +// [test2json]: https://golang.org/cmd/test2json/ + +package testfilter + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "strings" + "time" + + "golang.org/x/sync/errgroup" +) + +const ( + // omit output for non-failing tests, but print run/pass/skip events for all tests + modeStrip = "strip" + // only emit failing tests + modeOmit = "omit" + // don't perform any filtering, simply convert the json back to original test format + modeConvert = "convert" +) + +type testEvent struct { + Time time.Time // encodes as an RFC3339-format string + Action string + Package string + Test string + Elapsed float64 // seconds + Output string +} + +func (t *testEvent) json() string { + j, err := json.Marshal(t) + if err != nil { + panic(err) + } + return string(j) +} + +const packageLevelTestName = "PackageLevel" + +// tup identifies a single test. +type tup struct { + pkg string + test string +} + +type ent struct { + first, last string // RUN and (SKIP|PASS|FAIL) + strings.Builder // output + + // The following fields are set for package-level entries. + numActiveTests int // number of tests currently running in package. + numTestsFailed int // number of tests failed so far in package. +} + +// FilterAndWrite manipulates JSON streams in [test2json] format. +func FilterAndWrite(in io.Reader, out io.Writer, modesSequence []string) error { + type pipe struct { + reader io.Reader + writer io.Writer + } + pipesArray := make([]pipe, len(modesSequence)) + for idx := range modesSequence { + if idx == 0 { + pipesArray[idx].reader = in + } + if idx == len(modesSequence)-1 { + pipesArray[idx].writer = out + } else { + reader, writer := io.Pipe() + pipesArray[idx].writer = writer + pipesArray[idx+1].reader = reader + } + } + var eg errgroup.Group + for idx, mode := range modesSequence { + idx := idx + mode := mode + eg.Go(func() error { + if err := filter(pipesArray[idx].reader, pipesArray[idx].writer, mode); err != nil { + return err + } + if idx != 0 { + pipe := pipesArray[idx].reader.(*io.PipeReader) + if err := pipe.Close(); err != nil { + return err + } + } + if idx != len(modesSequence)-1 { + pipe := pipesArray[idx].writer.(*io.PipeWriter) + if err := pipe.Close(); err != nil { + return err + } + } + return nil + }) + } + return eg.Wait() +} + +func filter(in io.Reader, out io.Writer, mode string) error { + scanner := bufio.NewScanner(in) + m := map[tup]*ent{} + ev := &testEvent{} + var n int // number of JSON lines parsed + var passFailLine string // catch common error of piping non-json test output in + for scanner.Scan() { + line := scanner.Text() // has no EOL marker + if len(line) <= 2 || line[0] != '{' || line[len(line)-1] != '}' { + // Not test2json output, pass it through except in `omit` mode. + // It's important that we still see build errors etc when running + // in -mode=strip. + if passFailLine == "" && (strings.Contains(line, "PASS") || strings.Contains(line, "FAIL")) { + passFailLine = line + } + if mode != modeOmit { + fmt.Fprintln(out, line) + } + continue + } + *ev = testEvent{} + if err := json.Unmarshal([]byte(line), ev); err != nil { + return err + } + n++ + + if mode == modeConvert { + if ev.Action == "output" { + fmt.Fprint(out, ev.Output) + } + continue + } + + if ev.Test == "" { + // This is a package-level message. + + // Populate a fake test name. We need this because TC is blind + // to anything without a "Test" field. + ev.Test = packageLevelTestName + pkey := tup{ev.Package, ev.Test} + + switch ev.Action { + case "fail": + buf := m[pkey] + + // Is the package failing with some non-terminated tests? + hasOpenSubTests := buf != nil && buf.numActiveTests > 0 + + // Did the package contain any failing tests? + hadSomeFailedTests := buf != nil && buf.numTestsFailed > 0 + + // If the package is failing without non-terminated tests, + // but it contained some failing tests, then we rely on these + // tests' output to explain what happened to the user. In that + // case, we are happy to ignore the package-level output. + // + // (If the package did not have any failing tests at all, we + // still want some package-level output: in that case, if it + // fails we want some details about that below. If there was + // any non-terminating test, we also mandate a package-level + // result, which will contain the list of non-terminating + // tests.) + if !hasOpenSubTests && hadSomeFailedTests { + delete(m, pkey) + continue + } + } + + // At this point, either: + // - we are starting to see output for a package which + // has not yet terminated processing; or + // - we are seeing the last pass/fail entry for a package, + // and there is something "interesting" to report + // for this package. + // + // In both cases, we are going to emit a test result for the + // package itself in the common output processing case below. + // For this to be valid/possible, we first need to ensure + // the map contains a package-level entry. + if err := ensurePackageEntry(m, out, ev, pkey, mode); err != nil { + return err + } + + // At this point, either we are still processing a package's + // output before it completes, or the last package event. + + const helpMessage = ` +Check full_output.txt in artifacts for stray panics or other errors that broke +the test process. Note that you might be looking at this message from a parent +CI job. To reliably get to the "correct" job, click the drop-down next to +"PackageLevel" above, then "Show in build log", and then navigate to the +artifacts tab. See: + +https://user-images.githubusercontent.com/5076964/110923167-e2ab4780-8320-11eb-8fba-99da632aa814.png +https://user-images.githubusercontent.com/5076964/110923299-08d0e780-8321-11eb-91af-f4eedcf8bacb.png + +for details. +` + if ev.Action != "output" { + // This is the final event for the package. + // + // Dump all the test scopes so far in this package, then + // forget about them. This ensures that the test scopes are + // closed before the package scope is closed the final output. + var testReport strings.Builder + for key := range m { + if key.pkg == ev.Package && key.test != ev.Test { + // We only mention the test scopes without their sub-tests; + // otherwise we could get tens of thousands of output lines + // for a failed logic test run due to a panic. + if strings.Contains(key.test, "/") { + // Sub-test. Just forget all about it. + delete(m, key) + } else { + // Not a sub-test. + + // Remember the test's name to report in the + // package-level output. + testReport.WriteString("\n" + key.test) + + // Synthetize a "skip" message. We want "something" (and + // not nothing) so that we get some timing information + // in strip mode. + // + // We use "skip" and not "fail" to ensure that no issue + // gets filed for the open-ended tests by the github + // auto-poster: we don't have confidence for any of them + // that they are the particular cause of the failure. + syntheticSkipEv := testEvent{ + Time: ev.Time, + Action: "skip", + Package: ev.Package, + Test: key.test, + Elapsed: 0, + Output: "unfinished due to package-level failure" + helpMessage, + } + // Translate the synthetic message back into an output line. + syntheticLine := syntheticSkipEv.json() + if err := processTestEvent(m, out, &syntheticSkipEv, syntheticLine, mode); err != nil { + return err + } + } + } + } + + // If the package is failing, tell the user that something was amiss. + if ev.Action == "fail" { + ev.Output += helpMessage + if testReport.Len() > 0 { + ev.Output += "\nThe following tests have not completed and could be the cause of the failure:" + testReport.String() + } + } + } + + // Re-populate the line from the JSON payload for the + // PackageLevel pseudo-test. + line = ev.json() + } + + // Common output processing. + if err := processTestEvent(m, out, ev, line, mode); err != nil { + return err + } + } + // Some scopes might still be open. To the best of my knowledge, + // this is due to a panic/premature exit of a single-package test + // binary. In that case, it seems that neither is the package scope + // closed, nor the scopes for any tests that were running in + // parallel, so we pass that through if stripping, but not when + // omitting. + if mode == modeStrip { + for key := range m { + buf := m[key] + // Skip over the package-level pseudo-entries. Since we're + // single-package, the remainder of the output is sufficient + // here. + if key.test == packageLevelTestName { + continue + } + fmt.Fprintln(out, buf.String()) + } + } + // TODO(tbg): would like to return an error here for sanity, but the + // JSON just isn't well-formed all the time. For example, at the time + // of writing, here's a repro: + // make benchshort PKG=./pkg/bench BENCHES=BenchmarkIndexJoin 2>&1 | \ + // testfilter -mode=strip + // Interestingly it works once we remove the `log.Scope(b).Close` in + // that test. Adding TESTFLAGS=-v doesn't matter apparently. + // if len(m) != 0 { + // return fmt.Errorf("%d tests did not terminate (a package likely exited prematurely)", len(m)) + // } + if mode != modeConvert && n == 0 && passFailLine != "" { + // Without this, if the input to this command wasn't even JSON, we would + // pass. That's a mistake we should avoid at all costs. Note that even + // `go test -run - ./some/pkg` produces n>0 due to the start/pass events + // for the package, so if we're here then 100% something weird is going + // on. + return fmt.Errorf("not a single test was parsed, but detected test output: %s", passFailLine) + } + return nil +} + +// ensurePackageEntry ensures there is a package-level entry in the +// map for each package. +// +// This is necessary because we want to consider package-level +// results as regular test results. To achieve this +// successfully, we need to understand how TC processes tests. +// +// TC is a bit peculiar and requires all tests to *start* with +// an event with action "run", then zero or more "output" +// actions, then one of either "pass", "skip" or "fail". +// +// Unfortunately, `go test` does not emit initial "run" +// entries for package-level outputs. This is arguably a +// bug. Instead it starts directly with either "output" or +// "pass"/"fail" at the end. This prevents TC from treating +// the package as a test. To fix that, We insert a synthetic +// "run" entry for the package in the map here. +func ensurePackageEntry(m map[tup]*ent, out io.Writer, ev *testEvent, pkey tup, mode string) error { + if buf := m[pkey]; buf != nil { + return nil + } + // Package not known yet. Synthetize an entry. + packageEvent := *ev + packageEvent.Test = packageLevelTestName + packageEvent.Action = "run" + packageEvent.Output = "" + packageLine := packageEvent.json() + return processTestEvent(m, out, &packageEvent, packageLine, mode) +} + +func processTestEvent( + m map[tup]*ent, out io.Writer, ev *testEvent, line string, mode string, +) error { + // The package key. + pkey := tup{ev.Package, packageLevelTestName} + // The test's key. + key := tup{ev.Package, ev.Test} + + // Is this a regular test? In that case, ensure there is a + // package-level entry for this test. + if ev.Test != packageLevelTestName { + if err := ensurePackageEntry(m, out, ev, pkey, mode); err != nil { + return err + } + } + + // Now process the test itself. + buf := m[key] + if buf == nil { + buf = &ent{first: line} + m[key] = buf + if key != pkey { + // Remember how many tests we're seeing. + m[pkey].numActiveTests++ + } + } + if _, err := fmt.Fprintln(buf, line); err != nil { + return err + } + switch ev.Action { + case "pass", "skip", "fail": + buf.last = line + if ev.Action == "fail" { + fmt.Fprint(out, buf.String()) + } else if mode == modeStrip { + // Output only the start and end of test so that we preserve the + // timing information. However, the output is omitted. + fmt.Fprintln(out, buf.first) + fmt.Fprintln(out, buf.last) + } + + // Forget the test. + delete(m, key) + if key != pkey { + m[pkey].numActiveTests-- + if ev.Action == "fail" { + m[pkey].numTestsFailed++ + } + } + + case "run", "pause", "cont", "bench", "output": + default: + // We must have parsed some JSON that wasn't a testData. + return fmt.Errorf("unknown input: %s", line) + } + return nil +} diff --git a/pkg/cmd/testfilter/main_test.go b/pkg/cmd/bazci/testfilter/testfilter_test.go similarity index 84% rename from pkg/cmd/testfilter/main_test.go rename to pkg/cmd/bazci/testfilter/testfilter_test.go index 853c8fe8c3a9..85bc16acb05b 100644 --- a/pkg/cmd/testfilter/main_test.go +++ b/pkg/cmd/bazci/testfilter/testfilter_test.go @@ -8,7 +8,7 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -package main +package testfilter import ( "strings" @@ -19,18 +19,13 @@ import ( "github.com/cockroachdb/datadriven" ) -func TestFilter(t *testing.T) { +func TestFilterAndWrite(t *testing.T) { defer leaktest.AfterTest(t)() - datadriven.Walk(t, testutils.TestDataPath(t), func(t *testing.T, path string) { datadriven.RunTest(t, path, func(t *testing.T, td *datadriven.TestData) string { in := strings.NewReader(td.Input) var out strings.Builder - var mode modeT - if err := mode.Set(td.Cmd); err != nil { - return err.Error() - } - if err := filter(in, &out, mode); err != nil { + if err := FilterAndWrite(in, &out, []string{td.Cmd}); err != nil { return err.Error() } // At the time of writing, datadriven garbles the test files when diff --git a/pkg/cmd/github-post/BUILD.bazel b/pkg/cmd/github-post/BUILD.bazel index 033902f1ece1..10dfbd1f70d9 100644 --- a/pkg/cmd/github-post/BUILD.bazel +++ b/pkg/cmd/github-post/BUILD.bazel @@ -1,17 +1,12 @@ load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") -load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test") +load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") go_library( name = "github-post_lib", srcs = ["main.go"], importpath = "github.com/cockroachdb/cockroach/pkg/cmd/github-post", visibility = ["//visibility:private"], - deps = [ - "//pkg/cmd/internal/issues", - "//pkg/internal/codeowners", - "//pkg/internal/team", - "@com_github_cockroachdb_errors//:errors", - ], + deps = ["//pkg/cmd/bazci/githubpost"], ) go_binary( @@ -20,20 +15,4 @@ go_binary( visibility = ["//visibility:public"], ) -go_test( - name = "github-post_test", - size = "small", - srcs = ["main_test.go"], - args = ["-test.timeout=55s"], - data = glob(["testdata/**"]), - embed = [":github-post_lib"], - tags = ["broken_in_bazel"], - deps = [ - "//pkg/cmd/internal/issues", - "//pkg/testutils", - "@com_github_stretchr_testify//assert", - "@com_github_stretchr_testify//require", - ], -) - get_x_data(name = "get_x_data") diff --git a/pkg/cmd/github-post/main.go b/pkg/cmd/github-post/main.go index 5d382d73f875..67437edfafb7 100644 --- a/pkg/cmd/github-post/main.go +++ b/pkg/cmd/github-post/main.go @@ -16,580 +16,15 @@ package main import ( - "bufio" - "bytes" - "context" - "encoding/json" "flag" - "fmt" - "io" - "log" "os" - "os/exec" - "path/filepath" - "regexp" - "sort" - "strconv" - "strings" - "time" - "github.com/cockroachdb/cockroach/pkg/cmd/internal/issues" - "github.com/cockroachdb/cockroach/pkg/internal/codeowners" - "github.com/cockroachdb/cockroach/pkg/internal/team" - "github.com/cockroachdb/errors" + "github.com/cockroachdb/cockroach/pkg/cmd/bazci/githubpost" ) -const ( - pkgEnv = "PKG" - unknown = "(unknown)" -) - -type formatter func(context.Context, failure) (issues.IssueFormatter, issues.PostRequest) - -func defaultFormatter(ctx context.Context, f failure) (issues.IssueFormatter, issues.PostRequest) { - teams := getOwner(ctx, f.packageName, f.testName) - repro := fmt.Sprintf("./dev test ./pkg/%s --race --stress -f %s TESTTIMEOUT=5m STRESSFLAGS='-timeout 5m' 2>&1", - trimPkg(f.packageName), f.testName) - - var projColID int - var mentions []string - if len(teams) > 0 { - projColID = teams[0].TriageColumnID - for _, team := range teams { - mentions = append(mentions, "@"+string(team.Name())) - } - } - return issues.UnitTestFormatter, issues.PostRequest{ - TestName: f.testName, - PackageName: f.packageName, - Message: f.testMessage, - Artifacts: "/", // best we can do for unit tests - HelpCommand: issues.UnitTestHelpCommand(repro), - MentionOnCreate: mentions, - ProjectColumnID: projColID, - } -} - func main() { formatterName := flag.String("formatter", "", "formatter to use to construct GitHub issues") flag.Parse() - var reqFromFailure formatter - switch *formatterName { - case "pebble-metamorphic": - reqFromFailure = formatPebbleMetamorphicIssue - default: - reqFromFailure = defaultFormatter - } - - fileIssue := func(ctx context.Context, f failure) error { - fmter, req := reqFromFailure(ctx, f) - return issues.Post(ctx, fmter, req) - } - - ctx := context.Background() - if err := listFailures(ctx, os.Stdin, fileIssue); err != nil { - log.Println(err) // keep going - } -} - -type failure struct { - title string - packageName string - testName string - testMessage string -} - -// This struct is described in the test2json documentation. -// https://golang.org/cmd/test2json/ -type testEvent struct { - Action string - Package string - Test string - Output string - Time time.Time // encodes as an RFC3339-format string - Elapsed float64 // seconds -} - -type scopedTest struct { - pkg string - name string -} - -func scoped(te testEvent) scopedTest { - if te.Package == "" { - return scopedTest{pkg: mustPkgFromEnv(), name: te.Test} - } - return scopedTest{pkg: te.Package, name: te.Test} -} - -func mustPkgFromEnv() string { - packageName := os.Getenv(pkgEnv) - if packageName == "" { - panic(errors.Errorf("package name environment variable %s is not set", pkgEnv)) - } - return packageName -} - -func maybeEnv(envKey, defaultValue string) string { - v := os.Getenv(envKey) - if v == "" { - return defaultValue - } - return v -} - -func shortPkg() string { - packageName := maybeEnv(pkgEnv, "unknown") - return trimPkg(packageName) -} - -func trimPkg(pkg string) string { - return strings.TrimPrefix(pkg, issues.CockroachPkgPrefix) -} - -func listFailures( - ctx context.Context, input io.Reader, fileIssue func(context.Context, failure) error, -) error { - // Tests that took less than this are not even considered for slow test - // reporting. This is so that we protect against large number of - // programmatically-generated subtests. - const shortTestFilterSecs float64 = 0.5 - var timeoutMsg = "panic: test timed out after" - - var packageOutput bytes.Buffer - - // map from test name to list of events (each log line is an event, plus - // start and pass/fail events). - // Tests/events are "outstanding" until we see a final pass/fail event. - // Because of the way the go test runner prints output, in case a subtest times - // out or panics, we don't get a pass/fail event for sibling and ancestor - // tests. Those tests will remain "outstanding" and will be ignored for the - // purpose of issue reporting. - outstandingOutput := make(map[scopedTest][]testEvent) - failures := make(map[scopedTest][]testEvent) - var slowPassEvents []testEvent - var slowFailEvents []testEvent - - // init is true for the preamble of the input before the first "run" test - // event. - init := true - // trustTimestamps will be set if we don't find a marker suggesting that the - // input comes from a stress run. In that case, stress prints all its output - // at once (for a captured failed test run), so the test2json timestamps are - // meaningless. - trustTimestamps := true - // elapsedTotalSec accumulates the time spent in all tests, passing or - // failing. In case the input comes from a stress run, this will be used to - // deduce the duration of a timed out test. - var elapsedTotalSec float64 - // Will be set if the last test timed out. - var timedOutCulprit scopedTest - var timedOutEvent testEvent - var curTestStart time.Time - var last scopedTest - var lastEvent testEvent - scanner := bufio.NewScanner(input) - for scanner.Scan() { - var te testEvent - { - line := scanner.Text() // has no EOL marker - if len(line) <= 2 || line[0] != '{' || line[len(line)-1] != '}' { - // This line is not test2json output, skip it. This can happen if - // whatever feeds our input has some extra non-JSON lines such as - // would happen with `make` invocations. - continue - } - if err := json.Unmarshal([]byte(line), &te); err != nil { - return errors.Wrapf(err, "unable to parse %q", line) - } - } - lastEvent = te - - if te.Test != "" { - init = false - } - if init && strings.Contains(te.Output, "-exec 'stress '") { - trustTimestamps = false - } - if timedOutCulprit.name == "" && te.Elapsed > 0 { - // We don't count subtests as those are counted in the parent. - if split := strings.SplitN(te.Test, "/", 2); len(split) == 1 { - elapsedTotalSec += te.Elapsed - } - } - - if timedOutCulprit.name == te.Test && te.Elapsed != 0 { - te.Elapsed = timedOutEvent.Elapsed - } - - // Events for the overall package test do not set Test. - if len(te.Test) > 0 { - switch te.Action { - case "run": - last = scoped(te) - if trustTimestamps { - curTestStart = te.Time - } - case "output": - key := scoped(te) - outstandingOutput[key] = append(outstandingOutput[key], te) - if strings.Contains(te.Output, timeoutMsg) { - timedOutCulprit = key - - // Fill in the Elapsed field for a timeout event. - // As of go1.11, the Elapsed field is bogus for fail events for timed - // out tests, so we do our own computation. - // See https://github.com/golang/go/issues/27568 - // - // Also, if the input is coming from stress, there will not even be a - // fail event for the test, so the Elapsed field computed here will be - // useful. - if trustTimestamps { - te.Elapsed = te.Time.Sub(curTestStart).Seconds() - } else { - // If we don't trust the timestamps, then we compute the test's - // duration by subtracting all the durations that we've seen so far - // (which we do trust to some extent). Note that this is not - // entirely accurate, since there's no information about the - // duration about sibling subtests which may have run. And further - // note that it doesn't work well at all for small timeouts because - // the resolution that the test durations have is just tens of - // milliseconds, so many quick tests are rounded of to a duration of - // 0. - re := regexp.MustCompile(`panic: test timed out after (\d*(?:\.\d*)?)(.)`) - matches := re.FindStringSubmatch(te.Output) - if matches == nil { - log.Printf("failed to parse timeout message: %s", te.Output) - te.Elapsed = -1 - } else { - dur, err := strconv.ParseFloat(matches[1], 64) - if err != nil { - log.Fatal(err) - } - if matches[2] == "m" { - // minutes to seconds - dur *= 60 - } else if matches[2] != "s" { - log.Fatalf("unexpected time unit in: %s", te.Output) - } - te.Elapsed = dur - elapsedTotalSec - } - } - timedOutEvent = te - } - case "pass", "skip": - if timedOutCulprit.name != "" { - // NB: we used to do this: - // panic(fmt.Sprintf("detected test timeout but test seems to have passed (%+v)", te)) - // but it would get hit. There is no good way to - // blame a timeout on a particular test. We should probably remove this - // logic in the first place, but for now make sure we don't panic as - // a result of it. - timedOutCulprit = scopedTest{} - } - delete(outstandingOutput, scoped(te)) - if te.Elapsed > shortTestFilterSecs { - // We ignore subtests; their time contributes to the parent's. - if !strings.Contains(te.Test, "/") { - slowPassEvents = append(slowPassEvents, te) - } - } - case "fail": - key := scoped(te) - // Record slow tests. We ignore subtests; their time contributes to the - // parent's. Except the timed out (sub)test, for which the parent (if - // any) is not going to appear in the report because there's not going - // to be a pass/fail event for it. - if !strings.Contains(te.Test, "/") || timedOutCulprit == key { - slowFailEvents = append(slowFailEvents, te) - } - // Move the test to the failures collection unless the test timed out. - // We have special reporting for timeouts below. - if timedOutCulprit != key { - failures[key] = outstandingOutput[key] - } - delete(outstandingOutput, key) - } - } else if te.Action == "output" { - // Output was outside the context of a test. This consists mostly of the - // preamble and epilogue that Make outputs, but also any log messages that - // are printed by a test binary's main function. - packageOutput.WriteString(te.Output) - } - } - - // On timeout, we might or might not have gotten a fail event for the timed - // out test (we seem to get one when processing output from a test binary run, - // but not when processing the output of `stress`, which adds some lines at - // the end). If we haven't gotten a fail event, the test's output is still - // outstanding and the test is not registered in the slowFailEvents - // collection. The timeout handling code below relies on slowFailEvents not - // being empty though, so we'll process the test here. - if timedOutCulprit.name != "" { - if _, ok := outstandingOutput[timedOutCulprit]; ok { - slowFailEvents = append(slowFailEvents, timedOutEvent) - delete(outstandingOutput, timedOutCulprit) - } - } else { - // If we haven't received a final event for the last test, then a - // panic/log.Fatal must have happened. Consider it failed. - // Note that because of https://github.com/golang/go/issues/27582 there - // might be other outstanding tests; we ignore those. - if _, ok := outstandingOutput[last]; ok { - log.Printf("found outstanding output. Considering last test failed: %s", last) - failures[last] = outstandingOutput[last] - } - } - - // test2json always puts a fail event last unless it sees a big pass message - // from the test output. - if lastEvent.Action == "fail" && len(failures) == 0 && timedOutCulprit.name == "" { - // If we couldn't find a failing Go test, assume that a failure occurred - // before running Go and post an issue about that. - err := fileIssue(ctx, failure{ - title: fmt.Sprintf("%s: package failed", shortPkg()), - packageName: maybeEnv(pkgEnv, "unknown"), - testName: unknown, - testMessage: packageOutput.String(), - }) - if err != nil { - return errors.Wrap(err, "failed to post issue") - } - } else { - for test, testEvents := range failures { - if split := strings.SplitN(test.name, "/", 2); len(split) == 2 { - parentTest, subTest := scopedTest{pkg: test.pkg, name: split[0]}, scopedTest{pkg: test.pkg, name: split[1]} - log.Printf("consolidating failed subtest %q into parent test %q", subTest.name, parentTest.name) - failures[parentTest] = append(failures[parentTest], testEvents...) - delete(failures, test) - } else { - log.Printf("failed parent test %q (no subtests)", test.name) - if _, ok := failures[test]; !ok { - return errors.AssertionFailedf("expected %q in 'failures'", test.name) - } - } - } - // Sort the failed tests to make the unit tests for this script deterministic. - var failedTestNames []scopedTest - for name := range failures { - failedTestNames = append(failedTestNames, name) - } - sort.Slice(failedTestNames, func(i, j int) bool { - return fmt.Sprint(failedTestNames[i]) < fmt.Sprint(failedTestNames[j]) - }) - for _, test := range failedTestNames { - testEvents := failures[test] - var outputs []string - for _, testEvent := range testEvents { - outputs = append(outputs, testEvent.Output) - } - err := fileIssue(ctx, failure{ - title: fmt.Sprintf("%s: %s failed", trimPkg(test.pkg), test.name), - packageName: test.pkg, - testName: test.name, - testMessage: strings.Join(outputs, ""), - }) - if err != nil { - return errors.Wrap(err, "failed to post issue") - } - } - } - - // Sort slow tests descendingly by duration. - sort.Slice(slowPassEvents, func(i, j int) bool { - return slowPassEvents[i].Elapsed > slowPassEvents[j].Elapsed - }) - sort.Slice(slowFailEvents, func(i, j int) bool { - return slowFailEvents[i].Elapsed > slowFailEvents[j].Elapsed - }) - - report := genSlowTestsReport(slowPassEvents, slowFailEvents) - if err := writeSlowTestsReport(report); err != nil { - log.Printf("failed to create slow tests report: %s", err) - } - - // If the run timed out, file an issue. A couple of cases: - // 1) If the test that was running when the package timed out is the longest - // test, then we blame it. The common case is the test deadlocking - it would - // have run forever. - // 2) Otherwise, we don't blame anybody in particular. We file a generic issue - // listing the package name containing the report of long-running tests. - if timedOutCulprit.name != "" { - slowest := slowFailEvents[0] - if len(slowPassEvents) > 0 && slowPassEvents[0].Elapsed > slowest.Elapsed { - slowest = slowPassEvents[0] - } - - culpritOwner := fmt.Sprintf("%v", getOwner(ctx, timedOutCulprit.pkg, timedOutCulprit.name)) - slowEvents := append(slowFailEvents, slowPassEvents...) - // The predicate determines if the union of the slow events is owned by the _same_ team(s) as timedOutCulprit. - hasSameOwner := func() bool { - for _, slowEvent := range slowEvents { - scopedEvent := scoped(slowEvent) - owner := fmt.Sprintf("%v", getOwner(ctx, scopedEvent.pkg, scopedEvent.name)) - - if culpritOwner != owner { - log.Printf("%v has a different owner: %s;bailing out...\n", scopedEvent, owner) - return false - } - } - return true - } - if timedOutCulprit == scoped(slowest) || hasSameOwner() { - // The test that was running when the timeout hit is either the one that ran for - // the longest time or all other tests share the same owner. - log.Printf("timeout culprit found: %s\n", timedOutCulprit.name) - err := fileIssue(ctx, failure{ - title: fmt.Sprintf("%s: %s timed out", trimPkg(timedOutCulprit.pkg), timedOutCulprit.name), - packageName: timedOutCulprit.pkg, - testName: timedOutCulprit.name, - testMessage: report, - }) - if err != nil { - return errors.Wrap(err, "failed to post issue") - } - } else { - log.Printf("timeout culprit not found\n") - // TODO(irfansharif): These are assigned to nobody given our lack of - // a story around #51653. It'd be nice to be able to go from pkg - // name to team-name, and be able to assign to a specific team. - err := fileIssue(ctx, failure{ - title: fmt.Sprintf("%s: package timed out", shortPkg()), - packageName: maybeEnv(pkgEnv, "unknown"), - testName: unknown, - testMessage: report, - }) - if err != nil { - return errors.Wrap(err, "failed to post issue") - } - } - } - - return nil -} - -func genSlowTestsReport(slowPassingTests, slowFailingTests []testEvent) string { - var b strings.Builder - b.WriteString("Slow failing tests:\n") - for i, te := range slowFailingTests { - if i == 20 { - break - } - fmt.Fprintf(&b, "%s - %.2fs\n", te.Test, te.Elapsed) - } - if len(slowFailingTests) == 0 { - fmt.Fprint(&b, "\n") - } - - b.WriteString("\nSlow passing tests:\n") - for i, te := range slowPassingTests { - if i == 20 { - break - } - fmt.Fprintf(&b, "%s - %.2fs\n", te.Test, te.Elapsed) - } - if len(slowPassingTests) == 0 { - fmt.Fprint(&b, "\n") - } - return b.String() -} - -func writeSlowTestsReport(report string) error { - return os.WriteFile("artifacts/slow-tests-report.txt", []byte(report), 0644) -} - -// getFileLine returns the file (relative to repo root) and line for the given test. -// The package name is assumed relative to the repo root as well, i.e. pkg/foo/bar. -func getFileLine( - ctx context.Context, packageName, testName string, -) (_filename string, _linenum string, _ error) { - // Search the source code for the email address of the last committer to touch - // the first line of the source code that contains testName. Then, ask GitHub - // for the GitHub username of the user with that email address by searching - // commits in cockroachdb/cockroach for commits authored by the address. - subtests := strings.Split(testName, "/") - testName = subtests[0] - packageName = strings.TrimPrefix(packageName, "github.com/cockroachdb/cockroach/") - for { - if !strings.Contains(packageName, "pkg") { - return "", "", errors.Newf("could not find test %s", testName) - } - cmd := exec.Command(`/bin/bash`, `-c`, - fmt.Sprintf(`cd "$(git rev-parse --show-toplevel)" && git grep -n 'func %s(' '%s/*_test.go'`, - testName, packageName)) - // This command returns output such as: - // ../ccl/storageccl/export_test.go:31:func TestExportCmd(t *testing.T) { - out, err := cmd.CombinedOutput() - if err != nil { - fmt.Printf("couldn't find test %s in %s: %s %+v\n", - testName, packageName, string(out), err) - packageName = filepath.Dir(packageName) - continue - } - re := regexp.MustCompile(`(.*):(.*):`) - // The first 2 :-delimited fields are the filename and line number. - matches := re.FindSubmatch(out) - if matches == nil { - fmt.Printf("couldn't find filename/line number for test %s in %s: %s %+v\n", - testName, packageName, string(out), err) - packageName = filepath.Dir(packageName) - continue - } - return string(matches[1]), string(matches[2]), nil - } -} - -// getOwner looks up the file containing the given test and returns -// the owning teams. It does not return -// errors, but instead simply returns what it can. -// In case no owning team is found, "test-eng" team is returned. -func getOwner(ctx context.Context, packageName, testName string) (_teams []team.Team) { - filename, _, err := getFileLine(ctx, packageName, testName) - if err != nil { - log.Printf("errror getting file:line for %s.%s: %s", packageName, testName, err) - // Let's continue so that we can assign the "catch-all" owner. - } - co, err := codeowners.DefaultLoadCodeOwners() - if err != nil { - log.Printf("loading codeowners: %s", err) - return nil - } - match := co.Match(filename) - - if match == nil { - // N.B. if no owning team is found, we default to 'test-eng'. This should be a rare exception rather than the rule. - testEng := co.GetTeamForAlias("cockroachdb/test-eng") - if testEng.Name() == "" { - log.Fatalf("test-eng team could not be found in TEAMS.yaml") - } - log.Printf("assigning %s.%s to 'test-eng' as catch-all", packageName, testName) - match = []team.Team{testEng} - } - return match -} - -func formatPebbleMetamorphicIssue( - ctx context.Context, f failure, -) (issues.IssueFormatter, issues.PostRequest) { - var repro string - { - const seedHeader = "===== SEED =====\n" - i := strings.Index(f.testMessage, seedHeader) - if i != -1 { - s := f.testMessage[i+len(seedHeader):] - s = strings.TrimSpace(s) - s = strings.TrimSpace(s[:strings.Index(s, "\n")]) - repro = fmt.Sprintf("go test -mod=vendor -tags 'invariants' -exec 'stress -p 1' "+ - `-timeout 0 -test.v -run TestMeta$ ./internal/metamorphic -seed %s -ops "uniform:5000-10000"`, s) - } - } - return issues.UnitTestFormatter, issues.PostRequest{ - TestName: f.testName, - PackageName: f.packageName, - Message: f.testMessage, - Artifacts: "meta", - HelpCommand: issues.ReproductionCommandFromString(repro), - ExtraLabels: []string{"metamorphic-failure"}, - } + githubpost.Post(*formatterName, os.Stdin) } diff --git a/pkg/cmd/testfilter/BUILD.bazel b/pkg/cmd/testfilter/BUILD.bazel index 095c24d06797..0f8c2ca8ef43 100644 --- a/pkg/cmd/testfilter/BUILD.bazel +++ b/pkg/cmd/testfilter/BUILD.bazel @@ -1,12 +1,12 @@ load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") -load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test") +load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") go_library( name = "testfilter_lib", srcs = ["main.go"], importpath = "github.com/cockroachdb/cockroach/pkg/cmd/testfilter", visibility = ["//visibility:private"], - deps = ["@com_github_cockroachdb_errors//:errors"], + deps = ["//pkg/cmd/bazci/testfilter"], ) go_binary( @@ -15,18 +15,4 @@ go_binary( visibility = ["//visibility:public"], ) -go_test( - name = "testfilter_test", - size = "small", - srcs = ["main_test.go"], - args = ["-test.timeout=55s"], - data = glob(["testdata/**"]), - embed = [":testfilter_lib"], - deps = [ - "//pkg/testutils", - "//pkg/util/leaktest", - "@com_github_cockroachdb_datadriven//:datadriven", - ], -) - get_x_data(name = "get_x_data") diff --git a/pkg/cmd/testfilter/main.go b/pkg/cmd/testfilter/main.go index eb698c065b51..61da17a99392 100644 --- a/pkg/cmd/testfilter/main.go +++ b/pkg/cmd/testfilter/main.go @@ -36,16 +36,11 @@ package main import ( - "bufio" - "encoding/json" "flag" "fmt" - "io" "os" - "strings" - "time" - "github.com/cockroachdb/errors" + "github.com/cockroachdb/cockroach/pkg/cmd/bazci/testfilter" ) const modeUsage = `strip: @@ -56,374 +51,11 @@ convert: don't perform any filtering, simply convert the json back to original test format' ` -type modeT byte - -const ( - modeStrip modeT = iota - modeOmit - modeConvert -) - -func (m *modeT) Set(s string) error { - switch s { - case "strip": - *m = modeStrip - case "omit": - *m = modeOmit - case "convert": - *m = modeConvert - default: - return errors.New("unsupported mode") - } - return nil -} - -func (m *modeT) String() string { - switch *m { - case modeStrip: - return "strip" - case modeOmit: - return "omit" - case modeConvert: - return "convert" - default: - return "unknown" - } -} - -var modeVar = modeStrip - -func init() { - flag.Var(&modeVar, "mode", modeUsage) -} - -type testEvent struct { - Time time.Time // encodes as an RFC3339-format string - Action string - Package string - Test string - Elapsed float64 // seconds - Output string -} - -func (t *testEvent) json() string { - j, err := json.Marshal(t) - if err != nil { - panic(err) - } - return string(j) -} - func main() { + modeFlag := flag.String("mode", "strip", modeUsage) flag.Parse() - if err := filter(os.Stdin, os.Stdout, modeVar); err != nil { + if err := testfilter.FilterAndWrite(os.Stdin, os.Stdout, []string{*modeFlag}); err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } } - -const packageLevelTestName = "PackageLevel" - -// tup identifies a single test. -type tup struct { - pkg string - test string -} - -type ent struct { - first, last string // RUN and (SKIP|PASS|FAIL) - strings.Builder // output - - // The following fields are set for package-level entries. - numActiveTests int // number of tests currently running in package. - numTestsFailed int // number of tests failed so far in package. -} - -func filter(in io.Reader, out io.Writer, mode modeT) error { - scanner := bufio.NewScanner(in) - m := map[tup]*ent{} - ev := &testEvent{} - var n int // number of JSON lines parsed - var passFailLine string // catch common error of piping non-json test output in - for scanner.Scan() { - line := scanner.Text() // has no EOL marker - if len(line) <= 2 || line[0] != '{' || line[len(line)-1] != '}' { - // Not test2json output, pass it through except in `omit` mode. - // It's important that we still see build errors etc when running - // in -mode=strip. - if passFailLine == "" && (strings.Contains(line, "PASS") || strings.Contains(line, "FAIL")) { - passFailLine = line - } - if mode != modeOmit { - fmt.Fprintln(out, line) - } - continue - } - *ev = testEvent{} - if err := json.Unmarshal([]byte(line), ev); err != nil { - return err - } - n++ - - if mode == modeConvert { - if ev.Action == "output" { - fmt.Fprint(out, ev.Output) - } - continue - } - - if ev.Test == "" { - // This is a package-level message. - - // Populate a fake test name. We need this because TC is blind - // to anything without a "Test" field. - ev.Test = packageLevelTestName - pkey := tup{ev.Package, ev.Test} - - switch ev.Action { - case "fail": - buf := m[pkey] - - // Is the package failing with some non-terminated tests? - hasOpenSubTests := buf != nil && buf.numActiveTests > 0 - - // Did the package contain any failing tests? - hadSomeFailedTests := buf != nil && buf.numTestsFailed > 0 - - // If the package is failing without non-terminated tests, - // but it contained some failing tests, then we rely on these - // tests' output to explain what happened to the user. In that - // case, we are happy to ignore the package-level output. - // - // (If the package did not have any failing tests at all, we - // still want some package-level output: in that case, if it - // fails we want some details about that below. If there was - // any non-terminating test, we also mandate a package-level - // result, which will contain the list of non-terminating - // tests.) - if !hasOpenSubTests && hadSomeFailedTests { - delete(m, pkey) - continue - } - } - - // At this point, either: - // - we are starting to see output for a package which - // has not yet terminated processing; or - // - we are seeing the last pass/fail entry for a package, - // and there is something "interesting" to report - // for this package. - // - // In both cases, we are going to emit a test result for the - // package itself in the common output processing case below. - // For this to be valid/possible, we first need to ensure - // the map contains a package-level entry. - if err := ensurePackageEntry(m, out, ev, pkey, mode); err != nil { - return err - } - - // At this point, either we are still processing a package's - // output before it completes, or the last package event. - - const helpMessage = ` -Check full_output.txt in artifacts for stray panics or other errors that broke -the test process. Note that you might be looking at this message from a parent -CI job. To reliably get to the "correct" job, click the drop-down next to -"PackageLevel" above, then "Show in build log", and then navigate to the -artifacts tab. See: - -https://user-images.githubusercontent.com/5076964/110923167-e2ab4780-8320-11eb-8fba-99da632aa814.png -https://user-images.githubusercontent.com/5076964/110923299-08d0e780-8321-11eb-91af-f4eedcf8bacb.png - -for details. -` - if ev.Action != "output" { - // This is the final event for the package. - // - // Dump all the test scopes so far in this package, then - // forget about them. This ensures that the test scopes are - // closed before the package scope is closed the final output. - var testReport strings.Builder - for key := range m { - if key.pkg == ev.Package && key.test != ev.Test { - // We only mention the test scopes without their sub-tests; - // otherwise we could get tens of thousands of output lines - // for a failed logic test run due to a panic. - if strings.Contains(key.test, "/") { - // Sub-test. Just forget all about it. - delete(m, key) - } else { - // Not a sub-test. - - // Remember the test's name to report in the - // package-level output. - testReport.WriteString("\n" + key.test) - - // Synthetize a "skip" message. We want "something" (and - // not nothing) so that we get some timing information - // in strip mode. - // - // We use "skip" and not "fail" to ensure that no issue - // gets filed for the open-ended tests by the github - // auto-poster: we don't have confidence for any of them - // that they are the particular cause of the failure. - syntheticSkipEv := testEvent{ - Time: ev.Time, - Action: "skip", - Package: ev.Package, - Test: key.test, - Elapsed: 0, - Output: "unfinished due to package-level failure" + helpMessage, - } - // Translate the synthetic message back into an output line. - syntheticLine := syntheticSkipEv.json() - if err := processTestEvent(m, out, &syntheticSkipEv, syntheticLine, mode); err != nil { - return err - } - } - } - } - - // If the package is failing, tell the user that something was amiss. - if ev.Action == "fail" { - ev.Output += helpMessage - if testReport.Len() > 0 { - ev.Output += "\nThe following tests have not completed and could be the cause of the failure:" + testReport.String() - } - } - } - - // Re-populate the line from the JSON payload for the - // PackageLevel pseudo-test. - line = ev.json() - } - - // Common output processing. - if err := processTestEvent(m, out, ev, line, mode); err != nil { - return err - } - } - // Some scopes might still be open. To the best of my knowledge, - // this is due to a panic/premature exit of a single-package test - // binary. In that case, it seems that neither is the package scope - // closed, nor the scopes for any tests that were running in - // parallel, so we pass that through if stripping, but not when - // omitting. - if mode == modeStrip { - for key := range m { - buf := m[key] - // Skip over the package-level pseudo-entries. Since we're - // single-package, the remainder of the output is sufficient - // here. - if key.test == packageLevelTestName { - continue - } - fmt.Fprintln(out, buf.String()) - } - } - // TODO(tbg): would like to return an error here for sanity, but the - // JSON just isn't well-formed all the time. For example, at the time - // of writing, here's a repro: - // make benchshort PKG=./pkg/bench BENCHES=BenchmarkIndexJoin 2>&1 | \ - // testfilter -mode=strip - // Interestingly it works once we remove the `log.Scope(b).Close` in - // that test. Adding TESTFLAGS=-v doesn't matter apparently. - // if len(m) != 0 { - // return fmt.Errorf("%d tests did not terminate (a package likely exited prematurely)", len(m)) - // } - if mode != modeConvert && n == 0 && passFailLine != "" { - // Without this, if the input to this command wasn't even JSON, we would - // pass. That's a mistake we should avoid at all costs. Note that even - // `go test -run - ./some/pkg` produces n>0 due to the start/pass events - // for the package, so if we're here then 100% something weird is going - // on. - return fmt.Errorf("not a single test was parsed, but detected test output: %s", passFailLine) - } - return nil -} - -// ensurePackageEntry ensures there is a package-level entry in the -// map for each package. -// -// This is necessary because we want to consider package-level -// results as regular test results. To achieve this -// successfully, we need to understand how TC processes tests. -// -// TC is a bit peculiar and requires all tests to *start* with -// an event with action "run", then zero or more "output" -// actions, then one of either "pass", "skip" or "fail". -// -// Unfortunately, `go test` does not emit initial "run" -// entries for package-level outputs. This is arguably a -// bug. Instead it starts directly with either "output" or -// "pass"/"fail" at the end. This prevents TC from treating -// the package as a test. To fix that, We insert a synthetic -// "run" entry for the package in the map here. -func ensurePackageEntry(m map[tup]*ent, out io.Writer, ev *testEvent, pkey tup, mode modeT) error { - if buf := m[pkey]; buf != nil { - return nil - } - // Package not known yet. Synthetize an entry. - packageEvent := *ev - packageEvent.Test = packageLevelTestName - packageEvent.Action = "run" - packageEvent.Output = "" - packageLine := packageEvent.json() - return processTestEvent(m, out, &packageEvent, packageLine, mode) -} - -func processTestEvent(m map[tup]*ent, out io.Writer, ev *testEvent, line string, mode modeT) error { - // The package key. - pkey := tup{ev.Package, packageLevelTestName} - // The test's key. - key := tup{ev.Package, ev.Test} - - // Is this a regular test? In that case, ensure there is a - // package-level entry for this test. - if ev.Test != packageLevelTestName { - if err := ensurePackageEntry(m, out, ev, pkey, mode); err != nil { - return err - } - } - - // Now process the test itself. - buf := m[key] - if buf == nil { - buf = &ent{first: line} - m[key] = buf - if key != pkey { - // Remember how many tests we're seeing. - m[pkey].numActiveTests++ - } - } - if _, err := fmt.Fprintln(buf, line); err != nil { - return err - } - switch ev.Action { - case "pass", "skip", "fail": - buf.last = line - if ev.Action == "fail" { - fmt.Fprint(out, buf.String()) - } else if mode == modeStrip { - // Output only the start and end of test so that we preserve the - // timing information. However, the output is omitted. - fmt.Fprintln(out, buf.first) - fmt.Fprintln(out, buf.last) - } - - // Forget the test. - delete(m, key) - if key != pkey { - m[pkey].numActiveTests-- - if ev.Action == "fail" { - m[pkey].numTestsFailed++ - } - } - - case "run", "pause", "cont", "bench", "output": - default: - // We must have parsed some JSON that wasn't a testData. - return fmt.Errorf("unknown input: %s", line) - } - return nil -} diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index 94aac3f80288..9bbfbdb2a58f 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -257,6 +257,7 @@ go_library( "//pkg/util/pprofutil", "//pkg/util/protoutil", "//pkg/util/quotapool", + "//pkg/util/rangedesciter", "//pkg/util/retry", "//pkg/util/schedulerlatency", "//pkg/util/stop", diff --git a/pkg/server/server_sql.go b/pkg/server/server_sql.go index 2a3d5edd51d1..f787fc18cd6f 100644 --- a/pkg/server/server_sql.go +++ b/pkg/server/server_sql.go @@ -111,6 +111,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/mon" "github.com/cockroachdb/cockroach/pkg/util/netutil" "github.com/cockroachdb/cockroach/pkg/util/netutil/addr" + "github.com/cockroachdb/cockroach/pkg/util/rangedesciter" "github.com/cockroachdb/cockroach/pkg/util/stop" "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" @@ -1004,9 +1005,10 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*SQLServer, error) { var systemDeps upgrade.SystemDeps if codec.ForSystemTenant() { c = upgradecluster.New(upgradecluster.ClusterConfig{ - NodeLiveness: nodeLiveness, - Dialer: cfg.nodeDialer, - DB: cfg.db, + NodeLiveness: nodeLiveness, + Dialer: cfg.nodeDialer, + RangeDescIterator: rangedesciter.New(cfg.db), + DB: cfg.db, }) systemDeps = upgrade.SystemDeps{ Cluster: c, diff --git a/pkg/storage/pebble_file_registry.go b/pkg/storage/pebble_file_registry.go index 708e48c63e21..89f26a7ba936 100644 --- a/pkg/storage/pebble_file_registry.go +++ b/pkg/storage/pebble_file_registry.go @@ -532,6 +532,19 @@ func (r *PebbleFileRegistry) getRegistryCopy() *enginepb.FileRegistry { return rv } +// List returns a mapping of file in the registry to their enginepb.FileEntry. +func (r *PebbleFileRegistry) List() map[string]*enginepb.FileEntry { + r.mu.Lock() + defer r.mu.Unlock() + // Perform a defensive deep-copy of the internal map here, as there may be + // modifications to it after it has been returned to the caller. + m := make(map[string]*enginepb.FileEntry, len(r.mu.entries)) + for k, v := range r.mu.entries { + m[k] = v + } + return m +} + // Close closes the record writer and record file used for the registry. // It should be called when a Pebble instance is closed. func (r *PebbleFileRegistry) Close() error { diff --git a/pkg/storage/pebble_file_registry_test.go b/pkg/storage/pebble_file_registry_test.go index b2e6ca9aec20..33d79c35de3c 100644 --- a/pkg/storage/pebble_file_registry_test.go +++ b/pkg/storage/pebble_file_registry_test.go @@ -16,6 +16,7 @@ import ( "io" "os" "runtime/debug" + "sort" "strings" "testing" @@ -383,6 +384,29 @@ func TestFileRegistry(t *testing.T) { require.NoError(t, f.Close()) } return buf.String() + case "list": + type fileEntry struct { + name string + entry *enginepb.FileEntry + } + var fileEntries []fileEntry + for name, entry := range registry.List() { + fileEntries = append(fileEntries, fileEntry{ + name: name, + entry: entry, + }) + } + sort.Slice(fileEntries, func(i, j int) bool { + return fileEntries[i].name < fileEntries[j].name + }) + var b bytes.Buffer + for _, fe := range fileEntries { + b.WriteString(fmt.Sprintf( + "name=%s,type=%s,settings=%s\n", + fe.name, fe.entry.EnvType.String(), string(fe.entry.EncryptionSettings), + )) + } + return b.String() default: panic("unrecognized command " + d.Cmd) } diff --git a/pkg/storage/testdata/file_registry b/pkg/storage/testdata/file_registry index 71938ac1587a..a1bced6319e8 100644 --- a/pkg/storage/testdata/file_registry +++ b/pkg/storage/testdata/file_registry @@ -9,6 +9,9 @@ load ---- open-dir("") +list +---- + close ---- close("") @@ -24,6 +27,9 @@ load ---- open-dir("") +list +---- + set filename=foo settings=bar ---- create("COCKROACHDB_REGISTRY_000001") @@ -35,6 +41,10 @@ sync("") write("COCKROACHDB_REGISTRY_000001", <...23 bytes...>) sync("COCKROACHDB_REGISTRY_000001") +list +---- +name=foo,type=Data,settings=bar + close ---- write("COCKROACHDB_REGISTRY_000001", <...0 bytes...>) @@ -65,6 +75,9 @@ remove("COCKROACHDB_REGISTRY_000001") write("COCKROACHDB_REGISTRY_000002", <...14 bytes...>) sync("COCKROACHDB_REGISTRY_000002") +list +---- + close ---- write("COCKROACHDB_REGISTRY_000002", <...0 bytes...>) @@ -85,6 +98,9 @@ open-dir("") open("COCKROACHDB_REGISTRY_000002") close("COCKROACHDB_REGISTRY_000002") +list +---- + set filename=foo settings=bar ---- create("COCKROACHDB_REGISTRY_000003") @@ -98,6 +114,10 @@ remove("COCKROACHDB_REGISTRY_000002") write("COCKROACHDB_REGISTRY_000003", <...23 bytes...>) sync("COCKROACHDB_REGISTRY_000003") +list +---- +name=foo,type=Data,settings=bar + get filename=foo ---- bar @@ -138,6 +158,9 @@ remove("COCKROACHDB_REGISTRY_000003") write("COCKROACHDB_REGISTRY_000004", <...14 bytes...>) sync("COCKROACHDB_REGISTRY_000004") +list +---- + get filename=foo ---- @@ -160,6 +183,9 @@ load ---- open-dir("") +list +---- + set filename=foo settings=helloworld ---- create("COCKROACHDB_REGISTRY_000001") @@ -179,6 +205,11 @@ set filename=bar settings=hi write("COCKROACHDB_REGISTRY_000001", <...22 bytes...>) sync("COCKROACHDB_REGISTRY_000001") +list +---- +name=bar,type=Data,settings=hi +name=foo,type=Data,settings=helloworld + close ---- write("COCKROACHDB_REGISTRY_000001", <...0 bytes...>) @@ -209,6 +240,11 @@ close("COCKROACHDB_REGISTRY_000001") stat("bar") stat("foo") +list +---- +name=bar,type=Data,settings=hi +name=foo,type=Data,settings=helloworld + get filename=bar ---- hi @@ -226,6 +262,12 @@ remove("COCKROACHDB_REGISTRY_000001") write("COCKROACHDB_REGISTRY_000002", <...25 bytes...>) sync("COCKROACHDB_REGISTRY_000002") +list +---- +name=bar,type=Data,settings=hi +name=bax,type=Data,settings=hello +name=foo,type=Data,settings=helloworld + close ---- write("COCKROACHDB_REGISTRY_000002", <...0 bytes...>) diff --git a/pkg/upgrade/system_upgrade.go b/pkg/upgrade/system_upgrade.go index 1e50e1310d87..c6cde37755f4 100644 --- a/pkg/upgrade/system_upgrade.go +++ b/pkg/upgrade/system_upgrade.go @@ -94,30 +94,6 @@ type Cluster interface { // just be the `Migrate` request, with code added within [1] to do the // specific things intended for the specified version. // - // It's important to note that the closure is being executed in the context of - // a distributed transaction that may be automatically retried. So something - // like the following is an anti-pattern: - // - // processed := 0 - // _ = h.IterateRangeDescriptors(..., - // func(descriptors ...roachpb.RangeDescriptor) error { - // processed += len(descriptors) // we'll over count if retried - // log.Infof(ctx, "processed %d ranges", processed) - // }, - // ) - // - // Instead we allow callers to pass in a callback to signal on every attempt - // (including the first). This lets us salvage the example above: - // - // var processed int - // init := func() { processed = 0 } - // _ = h.IterateRangeDescriptors(..., init, - // func(descriptors ...roachpb.RangeDescriptor) error { - // processed += len(descriptors) - // log.Infof(ctx, "processed %d ranges", processed) - // }, - // ) - // // [1]: pkg/kv/kvserver/batch_eval/cmd_migrate.go IterateRangeDescriptors( ctx context.Context, diff --git a/pkg/upgrade/tenant_upgrade.go b/pkg/upgrade/tenant_upgrade.go index ad9045314032..773809d63f72 100644 --- a/pkg/upgrade/tenant_upgrade.go +++ b/pkg/upgrade/tenant_upgrade.go @@ -35,7 +35,6 @@ type TenantDeps struct { DB *kv.DB Codec keys.SQLCodec Settings *cluster.Settings - CollectionFactory *descs.CollectionFactory InternalExecutorFactory descs.TxnManager LeaseManager *lease.Manager JobRegistry *jobs.Registry diff --git a/pkg/upgrade/upgradecluster/BUILD.bazel b/pkg/upgrade/upgradecluster/BUILD.bazel index aa230dc64ab4..ed9ddb606aee 100644 --- a/pkg/upgrade/upgradecluster/BUILD.bazel +++ b/pkg/upgrade/upgradecluster/BUILD.bazel @@ -11,7 +11,6 @@ go_library( importpath = "github.com/cockroachdb/cockroach/pkg/upgrade/upgradecluster", visibility = ["//visibility:public"], deps = [ - "//pkg/keys", "//pkg/kv", "//pkg/kv/kvserver/liveness/livenesspb", "//pkg/roachpb", @@ -20,6 +19,7 @@ go_library( "//pkg/util/ctxgroup", "//pkg/util/log", "//pkg/util/quotapool", + "//pkg/util/rangedesciter", "@com_github_cockroachdb_errors//:errors", "@com_github_cockroachdb_redact//:redact", "@org_golang_google_grpc//:go_default_library", @@ -30,7 +30,6 @@ go_test( name = "upgradecluster_test", size = "small", srcs = [ - "client_test.go", "helper_test.go", "main_test.go", "nodes_test.go", @@ -38,15 +37,12 @@ go_test( args = ["-test.timeout=55s"], embed = [":upgradecluster"], deps = [ - "//pkg/keys", - "//pkg/kv/kvserver", "//pkg/roachpb", "//pkg/rpc", "//pkg/security/securityassets", "//pkg/security/securitytest", "//pkg/server", "//pkg/server/serverpb", - "//pkg/sql/tests", "//pkg/testutils", "//pkg/testutils/serverutils", "//pkg/testutils/testcluster", diff --git a/pkg/upgrade/upgradecluster/cluster.go b/pkg/upgrade/upgradecluster/cluster.go index 7edc8a3039ba..d1630a92b197 100644 --- a/pkg/upgrade/upgradecluster/cluster.go +++ b/pkg/upgrade/upgradecluster/cluster.go @@ -14,7 +14,6 @@ package upgradecluster import ( "context" - "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb" "github.com/cockroachdb/cockroach/pkg/roachpb" @@ -23,7 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/quotapool" - "github.com/cockroachdb/errors" + "github.com/cockroachdb/cockroach/pkg/util/rangedesciter" "github.com/cockroachdb/redact" "google.golang.org/grpc" ) @@ -42,6 +41,9 @@ type ClusterConfig struct { // Dialer constructs connections to other nodes. Dialer NodeDialer + // RangeDescIterator iterates through all range descriptors. + RangeDescIterator rangedesciter.Iterator + // DB provides access the kv.DB instance backing the cluster. // // TODO(irfansharif): We could hide the kv.DB instance behind an interface @@ -143,50 +145,5 @@ func (c *Cluster) ForEveryNode( func (c *Cluster) IterateRangeDescriptors( ctx context.Context, blockSize int, init func(), fn func(...roachpb.RangeDescriptor) error, ) error { - return c.c.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { - // Inform the caller that we're starting a fresh attempt to page in - // range descriptors. - init() - - // Iterate through meta{1,2} to pull out all the range descriptors. - var lastRangeIDInMeta1 roachpb.RangeID - return txn.Iterate(ctx, keys.MetaMin, keys.MetaMax, blockSize, - func(rows []kv.KeyValue) error { - descriptors := make([]roachpb.RangeDescriptor, 0, len(rows)) - var desc roachpb.RangeDescriptor - for _, row := range rows { - err := row.ValueProto(&desc) - if err != nil { - return errors.Wrapf(err, "unable to unmarshal range descriptor from %s", row.Key) - } - - // In small enough clusters it's possible for the same range - // descriptor to be stored in both meta1 and meta2. This - // happens when some range spans both the meta and the user - // keyspace. Consider when r1 is [/Min, - // /System/NodeLiveness); we'll store the range descriptor - // in both /Meta2/ and in /Meta1/KeyMax[1]. - // - // As part of iterator we'll de-duplicate this descriptor - // away by checking whether we've seen it before in meta1. - // Since we're scanning over the meta range in sorted - // order, it's enough to check against the last range - // descriptor we've seen in meta1. - // - // [1]: See kvserver.rangeAddressing. - if desc.RangeID == lastRangeIDInMeta1 { - continue - } - - descriptors = append(descriptors, desc) - if keys.InMeta1(keys.RangeMetaKey(desc.StartKey)) { - lastRangeIDInMeta1 = desc.RangeID - } - } - - // Invoke fn with the current chunk (of size ~blockSize) of - // range descriptors. - return fn(descriptors...) - }) - }) + return c.c.RangeDescIterator.Iterate(ctx, blockSize, init, fn) } diff --git a/pkg/upgrade/upgradejob/upgrade_job.go b/pkg/upgrade/upgradejob/upgrade_job.go index bbf8291a06f1..844d4ecbcfe9 100644 --- a/pkg/upgrade/upgradejob/upgrade_job.go +++ b/pkg/upgrade/upgradejob/upgrade_job.go @@ -92,7 +92,6 @@ func (r resumer) Resume(ctx context.Context, execCtxI interface{}) error { DB: execCtx.ExecCfg().DB, Codec: execCtx.ExecCfg().Codec, Settings: execCtx.ExecCfg().Settings, - CollectionFactory: execCtx.ExecCfg().CollectionFactory, InternalExecutorFactory: execCtx.ExecCfg().InternalExecutorFactory, LeaseManager: execCtx.ExecCfg().LeaseManager, InternalExecutor: execCtx.ExecCfg().InternalExecutor, diff --git a/pkg/util/rangedesciter/BUILD.bazel b/pkg/util/rangedesciter/BUILD.bazel new file mode 100644 index 000000000000..aed75899a35f --- /dev/null +++ b/pkg/util/rangedesciter/BUILD.bazel @@ -0,0 +1,39 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "rangedesciter", + srcs = ["rangedesciter.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/util/rangedesciter", + visibility = ["//visibility:public"], + deps = [ + "//pkg/keys", + "//pkg/kv", + "//pkg/roachpb", + "@com_github_cockroachdb_errors//:errors", + ], +) + +go_test( + name = "rangedesciter_test", + srcs = [ + "main_test.go", + "rangedesciter_test.go", + ], + args = ["-test.timeout=295s"], + deps = [ + ":rangedesciter", + "//pkg/keys", + "//pkg/kv/kvserver", + "//pkg/roachpb", + "//pkg/security/securityassets", + "//pkg/security/securitytest", + "//pkg/server", + "//pkg/sql/tests", + "//pkg/testutils/serverutils", + "//pkg/testutils/testcluster", + "//pkg/util/leaktest", + ], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/util/rangedesciter/main_test.go b/pkg/util/rangedesciter/main_test.go new file mode 100644 index 000000000000..9770c518a095 --- /dev/null +++ b/pkg/util/rangedesciter/main_test.go @@ -0,0 +1,31 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package rangedesciter_test + +import ( + "os" + "testing" + + "github.com/cockroachdb/cockroach/pkg/security/securityassets" + "github.com/cockroachdb/cockroach/pkg/security/securitytest" + "github.com/cockroachdb/cockroach/pkg/server" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" +) + +func TestMain(m *testing.M) { + securityassets.SetLoader(securitytest.EmbeddedAssets) + serverutils.InitTestServerFactory(server.TestServerFactory) + serverutils.InitTestClusterFactory(testcluster.TestClusterFactory) + os.Exit(m.Run()) +} + +//go:generate ../../util/leaktest/add-leaktest.sh *_test.go diff --git a/pkg/util/rangedesciter/rangedesciter.go b/pkg/util/rangedesciter/rangedesciter.go new file mode 100644 index 000000000000..315516f04d72 --- /dev/null +++ b/pkg/util/rangedesciter/rangedesciter.go @@ -0,0 +1,125 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package rangedesciter + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/kv" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/errors" +) + +// Iterator paginates through every range descriptor in the system. +type Iterator interface { + // Iterate paginates through range descriptors in the system using the given + // page size. It's important to note that the closure is being executed in + // the context of a distributed transaction that may be automatically + // retried. So something like the following is an anti-pattern: + // + // processed := 0 + // _ = rdi.Iterate(..., + // func(descriptors ...roachpb.RangeDescriptor) error { + // processed += len(descriptors) // we'll over count if retried + // log.Infof(ctx, "processed %d ranges", processed) + // }, + // ) + // + // Instead we allow callers to pass in a callback to signal on every attempt + // (including the first). This lets us salvage the example above: + // + // var processed int + // init := func() { processed = 0 } + // _ = rdi.Iterate(..., init, + // func(descriptors ...roachpb.RangeDescriptor) error { + // processed += len(descriptors) + // log.Infof(ctx, "processed %d ranges", processed) + // }, + // ) + Iterate( + ctx context.Context, pageSize int, init func(), + fn func(descriptors ...roachpb.RangeDescriptor) error, + ) error +} + +// DB is a database handle to a CRDB cluster. +type DB interface { + Txn(ctx context.Context, retryable func(context.Context, *kv.Txn) error) error +} + +// iteratorImpl is a concrete (private) implementation of the Iterator +// interface. +type iteratorImpl struct { + db DB +} + +// New returns an Iterator. +func New(db DB) Iterator { + return &iteratorImpl{db: db} +} + +var _ Iterator = &iteratorImpl{} + +// Iterate implements the Iterator interface. +func (i *iteratorImpl) Iterate( + ctx context.Context, + pageSize int, + init func(), + fn func(descriptors ...roachpb.RangeDescriptor) error, +) error { + return i.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { + // Inform the caller that we're starting a fresh attempt to page in + // range descriptors. + init() + + // Iterate through meta{1,2} to pull out all the range descriptors. + var lastRangeIDInMeta1 roachpb.RangeID + return txn.Iterate(ctx, keys.MetaMin, keys.MetaMax, pageSize, + func(rows []kv.KeyValue) error { + descriptors := make([]roachpb.RangeDescriptor, 0, len(rows)) + var desc roachpb.RangeDescriptor + for _, row := range rows { + err := row.ValueProto(&desc) + if err != nil { + return errors.Wrapf(err, "unable to unmarshal range descriptor from %s", row.Key) + } + + // In small enough clusters it's possible for the same range + // descriptor to be stored in both meta1 and meta2. This + // happens when some range spans both the meta and the user + // keyspace. Consider when r1 is [/Min, + // /System/NodeLiveness); we'll store the range descriptor + // in both /Meta2/ and in /Meta1/KeyMax[1]. + // + // As part of iterator we'll de-duplicate this descriptor + // away by checking whether we've seen it before in meta1. + // Since we're scanning over the meta range in sorted + // order, it's enough to check against the last range + // descriptor we've seen in meta1. + // + // [1]: See kvserver.rangeAddressing. + if desc.RangeID == lastRangeIDInMeta1 { + continue + } + + descriptors = append(descriptors, desc) + if keys.InMeta1(keys.RangeMetaKey(desc.StartKey)) { + lastRangeIDInMeta1 = desc.RangeID + } + } + + // Invoke fn with the current chunk (of size ~blockSize) of + // range descriptors. + return fn(descriptors...) + }) + }) +} diff --git a/pkg/upgrade/upgradecluster/client_test.go b/pkg/util/rangedesciter/rangedesciter_test.go similarity index 78% rename from pkg/upgrade/upgradecluster/client_test.go rename to pkg/util/rangedesciter/rangedesciter_test.go index 69346044364f..67f79259c109 100644 --- a/pkg/upgrade/upgradecluster/client_test.go +++ b/pkg/util/rangedesciter/rangedesciter_test.go @@ -1,4 +1,4 @@ -// Copyright 2020 The Cockroach Authors. +// Copyright 2022 The Cockroach Authors. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. @@ -8,7 +8,7 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -package upgradecluster_test +package rangedesciter_test import ( "context" @@ -20,17 +20,14 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/sql/tests" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" - "github.com/cockroachdb/cockroach/pkg/upgrade/nodelivenesstest" - "github.com/cockroachdb/cockroach/pkg/upgrade/upgradecluster" "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/rangedesciter" ) -func TestClusterIterateRangeDescriptors(t *testing.T) { +func TestIterator(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() - const numNodes = 1 - for _, splits := range [][]roachpb.Key{ {}, // no splits {keys.Meta2Prefix}, // split between meta1 and meta2 @@ -58,17 +55,11 @@ func TestClusterIterateRangeDescriptors(t *testing.T) { t.Fatal(err) } - c := nodelivenesstest.New(numNodes) - h := upgradecluster.New(upgradecluster.ClusterConfig{ - NodeLiveness: c, - Dialer: upgradecluster.NoopDialer{}, - DB: kvDB, - }) - + iter := rangedesciter.New(kvDB) for _, blockSize := range []int{1, 5, 10, 50} { var numDescs int init := func() { numDescs = 0 } - if err := h.IterateRangeDescriptors(ctx, blockSize, init, func(descriptors ...roachpb.RangeDescriptor) error { + if err := iter.Iterate(ctx, blockSize, init, func(descriptors ...roachpb.RangeDescriptor) error { numDescs += len(descriptors) return nil }); err != nil {