From 5c3bb1f409fa5131e3cac9b45de9b5aaf0728c56 Mon Sep 17 00:00:00 2001 From: ajwerner Date: Wed, 29 Mar 2023 15:18:32 -0400 Subject: [PATCH] sql: allow the in-memory version to be the next fence version In `SHOW CLUSTER SETTING version` we wait for the stored version to match the in-memory version. The in-memory version get pushed to the fence but the fence is not persisted, so, while migrations are ongoing, we won't see these values match. In practice this is a problem these days because the migrations take a long time. Epic: none Informs #99894 Release note (bug fix): Fixed a bug which could cause `SHOW CLUSTER SETTING version` to hang and return an opaque error while cluster finalization is ongoing. --- pkg/sql/BUILD.bazel | 1 + pkg/sql/show_cluster_setting.go | 47 ++++++++++++++++-- pkg/sql/show_cluster_setting_test.go | 72 ++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 5 deletions(-) create mode 100644 pkg/sql/show_cluster_setting_test.go diff --git a/pkg/sql/BUILD.bazel b/pkg/sql/BUILD.bazel index 6cda5a08bb10..56435ef3f3cf 100644 --- a/pkg/sql/BUILD.bazel +++ b/pkg/sql/BUILD.bazel @@ -658,6 +658,7 @@ go_test( "sequence_test.go", "session_migration_test.go", "set_zone_config_test.go", + "show_cluster_setting_test.go", "show_create_all_tables_builtin_test.go", "show_create_table_test.go", "show_fingerprints_test.go", diff --git a/pkg/sql/show_cluster_setting.go b/pkg/sql/show_cluster_setting.go index 6c188eb4de6f..c906b5bc4196 100644 --- a/pkg/sql/show_cluster_setting.go +++ b/pkg/sql/show_cluster_setting.go @@ -94,14 +94,14 @@ func (p *planner) getCurrentEncodedVersionSettingValue( } localRawVal := []byte(s.Get(&st.SV)) - if !bytes.Equal(localRawVal, kvRawVal) { + if err := checkClusterSettingValuesAreEquivalent( + localRawVal, kvRawVal, + ); err != nil { // NB: errors.Wrapf(nil, ...) returns nil. // nolint:errwrap - return errors.Errorf( - "value differs between local setting (%v) and KV (%v); try again later (%v after %s)", - localRawVal, kvRawVal, ctx.Err(), timeutil.Since(tBegin)) + return errors.WithHintf(err, "try again later (%v after %v)", + ctx.Err(), timeutil.Since(tBegin)) } - res = string(kvRawVal) return nil }) @@ -113,6 +113,43 @@ func (p *planner) getCurrentEncodedVersionSettingValue( return res, nil } +// checkClusterSettingValuesAreEquivalent returns an error if the cluster +// setting values are not equivalent. Equivalent cluster setting values +// are either the byte-for-byte identical, or the local value is the successor +// to the kv value and the local value is a fence version. +// +// The in-memory version gets pushed to the fence but the fence is not persisted, +// so, while migrations are ongoing, we won't see these values match. In practice +// this is a problem these days because the migrations take a long time. +func checkClusterSettingValuesAreEquivalent(localRawVal, kvRawVal []byte) error { + if bytes.Equal(localRawVal, kvRawVal) { + return nil + } + type cv = clusterversion.ClusterVersion + maybeDecodeVersion := func(data []byte) (cv, any, bool) { + if len(data) == 0 { + return cv{}, data, false + } + var v cv + if err := protoutil.Unmarshal(data, &v); err != nil { + return cv{}, data, false + } + return v, v, true + } + decodedLocal, localVal, localOk := maybeDecodeVersion(localRawVal) + decodedKV, kvVal, kvOk := maybeDecodeVersion(kvRawVal) + if localOk && kvOk && decodedLocal.Internal%2 == 1 /* isFence */ { + predecessor := decodedLocal + predecessor.Internal-- + if predecessor.Equal(decodedKV) { + return nil + } + } + return errors.Errorf( + "value differs between local setting (%v) and KV (%v)", + localVal, kvVal) +} + func (p *planner) ShowClusterSetting( ctx context.Context, n *tree.ShowClusterSetting, ) (planNode, error) { diff --git a/pkg/sql/show_cluster_setting_test.go b/pkg/sql/show_cluster_setting_test.go new file mode 100644 index 000000000000..d8ef812831eb --- /dev/null +++ b/pkg/sql/show_cluster_setting_test.go @@ -0,0 +1,72 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package sql + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/clusterversion" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/protoutil" + "github.com/stretchr/testify/require" +) + +// TestCheckClusterSettingValuesAreEquivalent exercises the logic used to +// decide whether `SHOW CLUSTER SETTING version` can return the value it read. +func TestCheckClusterSettingValuesAreEquivalent(t *testing.T) { + defer leaktest.AfterTest(t)() + encode := func(t *testing.T, s string) []byte { + v, err := roachpb.ParseVersion(s) + require.NoError(t, err) + cv := clusterversion.ClusterVersion{Version: v} + data, err := protoutil.Marshal(&cv) + require.NoError(t, err) + return data + } + for _, tc := range []struct { + local []byte + kv []byte + exp string + }{ + { // 0 + local: encode(t, "22.2-10"), + kv: encode(t, "22.2-10"), + }, + { // 1 + local: encode(t, "22.2-12"), + kv: encode(t, "22.2-11"), + exp: "value differs between local setting (22.2-12) and KV (22.2-11)", + }, + { // 2 + local: encode(t, "22.2-11"), + kv: encode(t, "22.2-10"), + }, + { // 3 + local: encode(t, "22.2-11"), + kv: []byte("abc"), + exp: "value differs between local setting (22.2-11) and KV ([97 98 99])", + }, + { // 4 + kv: encode(t, "22.2-11"), + exp: "value differs between local setting ([]) and KV (22.2-11)", + }, + } { + t.Run("", func(t *testing.T) { + err := checkClusterSettingValuesAreEquivalent(tc.local, tc.kv) + if tc.exp == "" { + require.NoError(t, err) + } else { + require.EqualError(t, err, tc.exp) + } + }) + } +}