From ba597f7c50db9005a3d5f66250f6e5901c5c2be4 Mon Sep 17 00:00:00 2001 From: Aditya Maru Date: Mon, 11 Apr 2022 14:08:11 -0400 Subject: [PATCH] protectedts: fix panic when fetching protectedts records Previously, we incorrectly assumed that all records in the `system.protectedts_records` table would have a non NULL target column. While this is enforced for all protected timestamp records written in a 22.1+ cluster, this is not true for records that might have been prior to the 22.1 cluster version being finalized. This change makes the method responsible for reading protectedts records more defensive when encountering a NULL target column. Fixes: #79684 Release note: None --- .../protectedts/ptstorage/BUILD.bazel | 3 + .../kvserver/protectedts/ptstorage/storage.go | 8 ++- .../protectedts/ptstorage/storage_test.go | 66 +++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/pkg/kv/kvserver/protectedts/ptstorage/BUILD.bazel b/pkg/kv/kvserver/protectedts/ptstorage/BUILD.bazel index 1879f9fa65dd..3ea2ab50135a 100644 --- a/pkg/kv/kvserver/protectedts/ptstorage/BUILD.bazel +++ b/pkg/kv/kvserver/protectedts/ptstorage/BUILD.bazel @@ -42,6 +42,8 @@ go_test( shard_count = 16, deps = [ "//pkg/base", + "//pkg/clusterversion", + "//pkg/jobs/jobsprotectedts", "//pkg/keys", "//pkg/kv", "//pkg/kv/kvserver/protectedts", @@ -62,6 +64,7 @@ go_test( "//pkg/sql/tests", "//pkg/testutils", "//pkg/testutils/serverutils", + "//pkg/testutils/sqlutils", "//pkg/testutils/testcluster", "//pkg/util/hlc", "//pkg/util/log", diff --git a/pkg/kv/kvserver/protectedts/ptstorage/storage.go b/pkg/kv/kvserver/protectedts/ptstorage/storage.go index a908a8faed5a..705133da306b 100644 --- a/pkg/kv/kvserver/protectedts/ptstorage/storage.go +++ b/pkg/kv/kvserver/protectedts/ptstorage/storage.go @@ -415,7 +415,13 @@ func rowToRecord( if !useDeprecatedProtectedTSStorage(ctx, st, knobs) { target := &ptpb.Target{} - if err := protoutil.Unmarshal([]byte(*row[6].(*tree.DBytes)), target); err != nil { + targetDBytes, ok := row[6].(*tree.DBytes) + if !ok { + // We are reading a pre-22.1 protected timestamp record that has a NULL + // target column, so there is nothing more to do. + return nil + } + if err := protoutil.Unmarshal([]byte(*targetDBytes), target); err != nil { return errors.Wrapf(err, "failed to unmarshal target for %v", r.ID) } r.Target = target diff --git a/pkg/kv/kvserver/protectedts/ptstorage/storage_test.go b/pkg/kv/kvserver/protectedts/ptstorage/storage_test.go index b808de2a20f3..eb6938fe375e 100644 --- a/pkg/kv/kvserver/protectedts/ptstorage/storage_test.go +++ b/pkg/kv/kvserver/protectedts/ptstorage/storage_test.go @@ -20,9 +20,12 @@ import ( "sort" "strconv" "testing" + "time" "unsafe" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/clusterversion" + "github.com/cockroachdb/cockroach/pkg/jobs/jobsprotectedts" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts" @@ -30,6 +33,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptstorage" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/security" + "github.com/cockroachdb/cockroach/pkg/server" "github.com/cockroachdb/cockroach/pkg/spanconfig" "github.com/cockroachdb/cockroach/pkg/sql" "github.com/cockroachdb/cockroach/pkg/sql/catalog" @@ -41,6 +45,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/tests" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -803,6 +808,67 @@ func TestErrorsFromSQL(t *testing.T) { }), "failed to read records: boom") } +// TestNullTargetColumnResolution is a regression test that ensures that +// pre-22.1 protected timestamp records that will have a NULL target column are +// still resolved correctly by the protectedts provider. Previously, this would +// panic due to an incorrect assumption that the column would be non-nullable. +func TestNullTargetColumnResolution(t *testing.T) { + ctx := context.Background() + params, _ := tests.CreateTestServerParams() + + disableUpgradeCh := make(chan struct{}) + params.Knobs = base.TestingKnobs{ + Server: &server.TestingKnobs{ + BinaryVersionOverride: clusterversion.ByKey( + clusterversion.AlterSystemProtectedTimestampAddColumn - 1), + DisableAutomaticVersionUpgrade: disableUpgradeCh, + }, + } + + tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ServerArgs: params}) + defer tc.Stopper().Stop(ctx) + + s := tc.Server(0) + execCfg := s.ExecutorConfig().(sql.ExecutorConfig) + ptp := execCfg.ProtectedTimestampProvider + sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0]) + + // Write an old style protected timestamp record with a null target column. + ts := hlc.Timestamp{WallTime: time.Now().UnixNano()} + recordID := uuid.MakeV4() + rec := jobsprotectedts.MakeRecord(recordID, int64(1), ts, []roachpb.Span{keys.EverythingSpan}, + jobsprotectedts.Jobs, nil /* target */) + require.NoError(t, execCfg.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { + return ptp.Protect(ctx, txn, rec) + })) + + // Close the channel so that the cluster version is upgraded. + close(disableUpgradeCh) + // Check the cluster version is bumped to newVersion. + testutils.SucceedsSoon(t, func() error { + var version string + sqlDB.QueryRow(t, "SELECT value FROM system.settings WHERE name = 'version'").Scan(&version) + var v clusterversion.ClusterVersion + if err := protoutil.Unmarshal([]byte(version), &v); err != nil { + return err + } + version = v.String() + if version != clusterversion.TestingBinaryVersion.String() { + return errors.Errorf("cluster version is still %s, should be %s", version, clusterversion.TestingBinaryVersion.String()) + } + return nil + }) + + // Check the record we wrote above is correctly unmarshalled. + require.NoError(t, execCfg.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { + state, err := ptp.GetState(ctx, txn) + require.NoError(t, err) + + require.Len(t, state.Records, 1) + return nil + })) +} + // wrappedInternalExecutor allows errors to be injected in SQL execution. type wrappedInternalExecutor struct { wrapped sqlutil.InternalExecutor