Skip to content

Commit

Permalink
nightlies,logictestccl: probabilistically run backup+restore during l…
Browse files Browse the repository at this point in the history
…ogic tests

This change introduces a nightly script that run the logictests
in logictestccl to run with `COCKROACH_LOGIC_TEST_BACKUP_RESTORE_PROBABILITY`
set to a non-zero value. This environment variable controls the probability of us
running a backup+restore between lines of a logic test. Egs: 1.0 means that
every line of the logic test will be run after a cluster backup and restore.

The motivation for this change is to increase the coverage of backup+restore
testing by piggybacking on our large logictest corpus. To begin with we only
run tests in logictestccl with config local, but this will be gradually expanded
as we work through the failure modes.

Informs: cockroachdb#77130

Release note: None
  • Loading branch information
adityamaru committed Sep 13, 2022
1 parent 203c078 commit e426939
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 10 deletions.
13 changes: 13 additions & 0 deletions build/teamcity/cockroach/nightlies/sqllogic_backup_restore.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash

set -euo pipefail

dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))"

source "$dir/teamcity-support.sh" # For $root
source "$dir/teamcity-bazel-support.sh" # For run_bazel

tc_start_block "Run SQL Logic Test Backup Restore"
BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e BUILD_VCS_NUMBER -e GITHUB_API_TOKEN -e GITHUB_REPO -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL" \
run_bazel build/teamcity/cockroach/nightlies/sqllogic_backup_restore_impl.sh
tc_end_block "Run SQL Logic Test Backup Restore"
32 changes: 32 additions & 0 deletions build/teamcity/cockroach/nightlies/sqllogic_backup_restore_impl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env bash

set -xeuo pipefail

dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))"
source "$dir/teamcity-bazel-support.sh" # For process_test_json

bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci
BAZEL_BIN=$(bazel info bazel-bin --config=ci)

ARTIFACTS_DIR=/artifacts
GO_TEST_JSON_OUTPUT_FILE=/artifacts/test.json.txt
exit_status=0

for config in local; do
$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci test -- --config=ci \
//pkg/ccl/logictestccl/tests/$config/... \
--test_arg=-show-sql \
--test_env=COCKROACH_LOGIC_TEST_BACKUP_RESTORE_PROBABILITY=0.8 \
--test_env=GO_TEST_WRAP_TESTV=1 \
--test_env=GO_TEST_WRAP=1 \
--test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE.$config \
--test_timeout=7200 \
|| exit_status=$?

process_test_json \
$BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \
$BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \
$ARTIFACTS_DIR \
$GO_TEST_JSON_OUTPUT_FILE \
$exit_status
done
1 change: 1 addition & 0 deletions pkg/ccl/logictestccl/testdata/logic_test/as_of
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# LogicTest: local
# BackupRestoreProbability: 0.0

statement ok
CREATE TABLE t (
Expand Down
2 changes: 2 additions & 0 deletions pkg/ccl/logictestccl/testdata/logic_test/crdb_internal
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ CREATE table t2 (a STRING PRIMARY KEY) PARTITION BY LIST (a) (

# Since there are no zone configurations on any of these partitions, tables,
# or databases, these partitions inherit directly from the default config.
skipif backup-restore
query IITTITTTII
SELECT * FROM crdb_internal.partitions ORDER BY table_id, index_id, name
----
Expand Down Expand Up @@ -64,6 +65,7 @@ CREATE TABLE t4 (a INT PRIMARY KEY, b INT); CREATE INDEX myt4index ON t4 (b); AL

user testuser

skipif backup-restore
query IT
SELECT zone_id, target FROM crdb_internal.zones ORDER BY 1
----
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ ALTER INDEX t_implicit_idx@t_a_idx PARTITION BY LIST (a) (
PARTITION "one" VALUES IN (1)
)

skipif backup-restore
query TTTTTT colnames
SELECT
indexrelid, indrelid, indkey, indclass, indoption, indcollation
Expand Down
5 changes: 5 additions & 0 deletions pkg/ccl/logictestccl/testdata/logic_test/schema_change_in_txn
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Backing up and restoring a descriptor will increment the version of the
# descriptor before restoring it so we cannot achieve the expected behaviour in
# this test.
# BackupRestoreProbability: 0.0

# Regression test for a situation involving creating a table in a transaction
# and altering the index when referenced by name.
subtest index_resolution_does_not_lead_to_new_version
Expand Down
31 changes: 22 additions & 9 deletions pkg/sql/logictest/logic.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,9 @@ var (
// writes not holding appropriate latches for range key stats update.
useMVCCRangeTombstonesForPointDeletes = util.ConstantWithMetamorphicTestBool(
"logictest-use-mvcc-range-tombstones-for-point-deletes", false)

// BackupRestoreProbability is the environment variable for `3node-backup` config.
backupRestoreProbability = envutil.EnvOrDefaultFloat64("COCKROACH_LOGIC_TEST_BACKUP_RESTORE_PROBABILITY", 0.0)
)

const queryRewritePlaceholderPrefix = "__async_query_rewrite_placeholder"
Expand Down Expand Up @@ -1665,6 +1668,7 @@ func (t *logicTest) shutdownCluster() {

// resetCluster cleans up the current cluster, and creates a fresh one.
func (t *logicTest) resetCluster() {
t.traceStop()
t.shutdownCluster()
if t.serverArgs == nil {
// We expect the server args to be persisted to the test during test
Expand Down Expand Up @@ -2041,9 +2045,10 @@ func (t *logicTest) hasOpenTxns(ctx context.Context) bool {
existingTxnPriority := "NORMAL"
err := user.QueryRow("SHOW TRANSACTION PRIORITY").Scan(&existingTxnPriority)
if err != nil {
// Ignore an error if we are unable to see transaction priority.
// If we are unable to see transaction priority assume we're in the middle
// of an explicit txn.
log.Warningf(ctx, "failed to check txn priority with %v", err)
continue
return true
}
if _, err := user.Exec("SET TRANSACTION PRIORITY NORMAL;"); !testutils.IsError(err, "there is no transaction in progress") {
// Reset the txn priority to what it was before we checked for open txns.
Expand All @@ -2064,11 +2069,6 @@ func (t *logicTest) hasOpenTxns(ctx context.Context) bool {
func (t *logicTest) maybeBackupRestore(
rng *rand.Rand, config logictestbase.TestClusterConfig,
) error {
if config.BackupRestoreProbability != 0 && !config.IsCCLConfig {
return errors.Newf("logic test config %s specifies a backup restore probability but is not CCL",
config.Name)
}

// We decide if we want to take a backup here based on a probability
// specified in the logic test config.
if rng.Float64() > config.BackupRestoreProbability {
Expand All @@ -2090,6 +2090,8 @@ func (t *logicTest) maybeBackupRestore(
t.setUser(oldUser, 0 /* nodeIdxOverride */)
}()

log.Info(context.Background(), "Running cluster backup and restore")

// To restore the same state in for the logic test, we need to restore the
// data and the session state. The session state includes things like session
// variables that are set for every session that is open.
Expand Down Expand Up @@ -2144,7 +2146,7 @@ func (t *logicTest) maybeBackupRestore(
// Perform the backup and restore as root.
t.setUser(username.RootUser, 0 /* nodeIdxOverride */)

if _, err := t.db.Exec(fmt.Sprintf("BACKUP TO '%s'", backupLocation)); err != nil {
if _, err := t.db.Exec(fmt.Sprintf("BACKUP INTO '%s'", backupLocation)); err != nil {
return errors.Wrap(err, "backing up cluster")
}

Expand All @@ -2154,7 +2156,7 @@ func (t *logicTest) maybeBackupRestore(

// Run the restore as root.
t.setUser(username.RootUser, 0 /* nodeIdxOverride */)
if _, err := t.db.Exec(fmt.Sprintf("RESTORE FROM '%s'", backupLocation)); err != nil {
if _, err := t.db.Exec(fmt.Sprintf("RESTORE FROM LATEST IN '%s'", backupLocation)); err != nil {
return errors.Wrap(err, "restoring cluster")
}

Expand Down Expand Up @@ -2867,6 +2869,10 @@ func (t *logicTest) processSubtest(
}
s.SetSkip(fmt.Sprintf("unsupported configuration %s (%s)", configName, issue))
}
case "backup-restore":
if config.BackupRestoreProbability > 0.0 {
s.SetSkip("backup-restore interferes with this check")
}
continue
default:
return errors.Errorf("unimplemented test statement: %s", s.Text())
Expand Down Expand Up @@ -3915,6 +3921,13 @@ func RunLogicTest(
lt.setup(
config, serverArgsCopy, readClusterOptions(t, path), readKnobOptions(t, path), readTenantClusterSettingOverrideArgs(t, path),
)

hasOverride, overriddenBackupRestoreProbability := logictestbase.ReadBackupRestoreProbabilityOverride(t, path)
config.BackupRestoreProbability = backupRestoreProbability
if hasOverride {
config.BackupRestoreProbability = overriddenBackupRestoreProbability
}

lt.runFile(path, config)

progress.total += lt.progress
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/logictestbase/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ go_library(
"//pkg/clusterversion",
"//pkg/roachpb",
"//pkg/util",
"//pkg/util/log",
],
)

Expand Down
53 changes: 52 additions & 1 deletion pkg/sql/logictest/logictestbase/logictestbase.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package logictestbase

import (
"bufio"
"context"
"flag"
"fmt"
"io"
Expand All @@ -25,6 +26,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/log"
)

var (
Expand Down Expand Up @@ -74,7 +76,7 @@ type TestClusterConfig struct {
// localities is set if nodes should be set to a particular locality.
// Nodes are 1-indexed.
Localities map[int]roachpb.Locality
// backupRestoreProbability will periodically backup the cluster and restore
// BackupRestoreProbability will periodically backup the cluster and restore
// it's state to a new cluster at random points during a logic test.
BackupRestoreProbability float64
// disableDeclarativeSchemaChanger will disable the declarative schema changer
Expand Down Expand Up @@ -577,6 +579,55 @@ func (l stdlogger) Logf(format string, args ...interface{}) {
fmt.Printf(format, args...)
}

// ReadBackupRestoreProbabilityOverride reads any LogicTest directive at the
// beginning of a test file. A line that starts with "#
// BackupRestoreProbability:" specifies the probability with which we should run
// a cluster backup + restore between lines of the test file.
//
// Example:
//
// # BackupRestoreProbability: 0.8
//
// If the file doesn't contain a directive, the value of the environment
// variable COCKROACH_LOGIC_TEST_BACKUP_RESTORE_PROBABILITY is used.
func ReadBackupRestoreProbabilityOverride(
t logger, path string,
) (hasOverride bool, probability float64) {
file, err := os.Open(path)
if err != nil {
t.Fatalf("failed open file %s", path)
}
defer file.Close()

s := NewLineScanner(file)
for s.Scan() {
fields := strings.Fields(s.Text())
if len(fields) == 0 {
continue
}
cmd := fields[0]
if !strings.HasPrefix(cmd, "#") {
// Stop at the first line that's not a comment (or empty).
log.Infof(context.Background(), "coming here one %v", fields)
break
}
if len(fields) > 1 && cmd == "#" && fields[1] == "BackupRestoreProbability:" {
log.Infof(context.Background(), "coming here %v", fields)
if len(fields) == 2 {
t.Fatalf("%s: empty LogicTest directive", path)
}
probability, err := strconv.ParseFloat(fields[2:][0], 64)
if err != nil {
t.Fatalf("failed to parse backup+restore probability: %+v", err)
}
return true, probability
}
}
log.Infof(context.Background(), "coming here!!!!")

return false, 0
}

// ReadTestFileConfigs reads any LogicTest directive at the beginning of a
// test file. A line that starts with "# LogicTest:" specifies a list of
// configuration names. The test file is run against each of those
Expand Down

0 comments on commit e426939

Please sign in to comment.