Skip to content

Commit

Permalink
Merge pull request #100124 from cockroachdb/blathers/backport-release…
Browse files Browse the repository at this point in the history
…-23.1-99792

release-23.1: backupccl: fingerprint 15GB restore roachtests
  • Loading branch information
msbutler authored Mar 31, 2023
2 parents fde1d54 + 0a07809 commit 5f85986
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 8 deletions.
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ go_library(
"//pkg/internal/sqlsmith",
"//pkg/jobs",
"//pkg/jobs/jobspb",
"//pkg/keys",
"//pkg/kv",
"//pkg/kv/kvpb",
"//pkg/multitenant/mtinfopb",
Expand Down
64 changes: 56 additions & 8 deletions pkg/cmd/roachtest/tests/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/jobs"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
Expand All @@ -46,9 +48,10 @@ func registerRestoreNodeShutdown(r registry.Registry) {
sp := restoreSpecs{
hardware: makeHardwareSpecs(hardwareSpecs{}),
backup: makeBackupSpecs(
backupSpecs{workload: tpceRestore{customers: 5000},
backupSpecs{workload: tpceRestore{customers: 1000},
version: "v22.2.1"}),
timeout: 1 * time.Hour,
timeout: 1 * time.Hour,
fingerprint: 8445446819555404274,
}

makeRestoreStarter := func(ctx context.Context, t test.Test, c cluster.Cluster, gatewayNode int) jobStarter {
Expand All @@ -75,6 +78,7 @@ func registerRestoreNodeShutdown(r registry.Registry) {
c.Put(ctx, t.Cockroach(), "./cockroach")
c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings())
jobSurvivesNodeShutdown(ctx, t, c, nodeToShutdown, makeRestoreStarter(ctx, t, c, gatewayNode))
sp.checkFingerprint(ctx)
},
})

Expand All @@ -97,6 +101,7 @@ func registerRestoreNodeShutdown(r registry.Registry) {
c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings())

jobSurvivesNodeShutdown(ctx, t, c, nodeToShutdown, makeRestoreStarter(ctx, t, c, gatewayNode))
sp.checkFingerprint(ctx)
},
})
}
Expand All @@ -112,10 +117,11 @@ func registerRestore(r registry.Registry) {
withPauseSpecs := restoreSpecs{
hardware: makeHardwareSpecs(hardwareSpecs{}),
backup: makeBackupSpecs(
backupSpecs{workload: tpceRestore{customers: 5000},
backupSpecs{workload: tpceRestore{customers: 1000},
version: "v22.2.1"}),
timeout: 3 * time.Hour,
namePrefix: "pause",
timeout: 3 * time.Hour,
namePrefix: "pause",
fingerprint: 8445446819555404274,
}
withPauseSpecs.initTestName()

Expand Down Expand Up @@ -254,6 +260,7 @@ func registerRestore(r registry.Registry) {
}
}
metricCollector()
withPauseSpecs.checkFingerprint(ctx)
return nil
})
m.Wait()
Expand Down Expand Up @@ -327,6 +334,15 @@ func registerRestore(r registry.Registry) {
timeout: 24 * time.Hour,
tags: []string{"weekly", "aws-weekly"},
},
{
// A teeny weeny 15GB restore that could be used to bisect scale agnostic perf regressions.
hardware: makeHardwareSpecs(hardwareSpecs{}),
backup: makeBackupSpecs(
backupSpecs{workload: tpceRestore{customers: 1000},
version: "v22.2.1"}),
timeout: 3 * time.Hour,
fingerprint: 8445446819555404274,
},
// TODO(msbutler): add the following tests once roachperf/grafana is hooked up and old tests are
// removed:
// - restore/tpce/400GB/nodes=30
Expand Down Expand Up @@ -372,6 +388,7 @@ func registerRestore(r registry.Registry) {
return err
}
metricCollector()
sp.checkFingerprint(ctx)
return nil
})
m.Wait()
Expand Down Expand Up @@ -578,6 +595,8 @@ func (tpce tpceRestore) String() string {
var builder strings.Builder
builder.WriteString("tpce/")
switch tpce.customers {
case 1000:
builder.WriteString("15GB")
case 5000:
builder.WriteString("80GB")
case 25000:
Expand All @@ -601,9 +620,10 @@ type restoreSpecs struct {
// namePrefix appears in the name of the roachtest, i.e. `restore/{prefix}/{config}`.
namePrefix string

t test.Test
c cluster.Cluster
testName string
t test.Test
c cluster.Cluster
testName string
fingerprint int
}

func (sp *restoreSpecs) initTestName() {
Expand Down Expand Up @@ -690,6 +710,34 @@ func (sp *restoreSpecs) initRestorePerfMetrics(
}
}

// checkFingerprint runs a stripped fingerprint on all user tables in the cluster if the restore
// spec has a nonzero fingerprint.
func (sp *restoreSpecs) checkFingerprint(ctx context.Context) {
if sp.fingerprint == 0 {
sp.t.L().Printf("Fingerprint not found in specs. Skipping fingerprint check.")
return
}

conn, err := sp.c.ConnE(ctx, sp.t.L(), sp.c.Node(1)[0])
require.NoError(sp.t, err)
sql := sqlutils.MakeSQLRunner(conn)

var minUserTableID, maxUserTableID uint32
sql.QueryRow(sp.t, `SELECT min(id), max(id) FROM system.namespace WHERE "parentID" >1`).Scan(
&minUserTableID, &maxUserTableID)

codec := keys.MakeSQLCodec(roachpb.SystemTenantID)
startKey := codec.TablePrefix(minUserTableID)
endkey := codec.TablePrefix(maxUserTableID).PrefixEnd()

startTime := timeutil.Now()
var fingerprint int
sql.QueryRow(sp.t, `SELECT * FROM crdb_internal.fingerprint(ARRAY[$1::BYTES, $2::BYTES],true)`,
startKey, endkey).Scan(&fingerprint)
sp.t.L().Printf("Fingerprint is %d. Took %.2f minutes", fingerprint, timeutil.Since(startTime).Minutes())
require.Equal(sp.t, sp.fingerprint, fingerprint, "user table fingerprint mismatch")
}

// exportToRoachperf exports a single perf metric for the given test to roachperf.
func exportToRoachperf(
ctx context.Context, t test.Test, c cluster.Cluster, testName string, metric int64,
Expand Down

0 comments on commit 5f85986

Please sign in to comment.