diff --git a/pkg/kv/kvserver/allocator/plan/util.go b/pkg/kv/kvserver/allocator/plan/util.go index 33e066a04f58..974d9b45392e 100644 --- a/pkg/kv/kvserver/allocator/plan/util.go +++ b/pkg/kv/kvserver/allocator/plan/util.go @@ -17,7 +17,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl" "github.com/cockroachdb/cockroach/pkg/raft" "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" "github.com/cockroachdb/redact" ) @@ -36,51 +35,6 @@ func ReplicationChangesForRebalance( rebalanceTargetType allocatorimpl.TargetReplicaType, ) (chgs []kvpb.ReplicationChange, performingSwap bool, err error) { rdesc, found := desc.GetReplicaDescriptor(addTarget.StoreID) - if rebalanceTargetType == allocatorimpl.VoterTarget && numExistingVoters == 1 { - // If there's only one replica, the removal target is the - // leaseholder and this is unsupported and will fail. However, - // this is also the only way to rebalance in a single-replica - // range. If we try the atomic swap here, we'll fail doing - // nothing, and so we stay locked into the current distribution - // of replicas. (Note that maybeTransferLeaseAway above will not - // have found a target, and so will have returned (false, nil). - // - // Do the best thing we can, which is carry out the addition - // only, which should succeed, and the next time we touch this - // range, we will have one more replica and hopefully it will - // take the lease and remove the current leaseholder. - // - // It's possible that "rebalancing deadlock" can occur in other - // scenarios, it's really impossible to tell from the code given - // the constraints we support. However, the lease transfer often - // does not happen spuriously, and we can't enter dangerous - // configurations sporadically, so this code path is only hit - // when we know it's necessary, picking the smaller of two evils. - // - // See https://github.com/cockroachdb/cockroach/issues/40333. - log.KvDistribution.Infof(ctx, "can't swap replica due to lease; falling back to add") - - // Even when there is only 1 existing voter, there may be other replica - // types in the range. Check if the add target already has a replica, if so - // it must be a non-voter or the rebalance is invalid. - if found && rdesc.Type == roachpb.NON_VOTER { - // The receiving store already has a non-voting replica. Instead of just - // adding a voter to the receiving store, we *must* promote the non-voting - // replica to a voter. - chgs = kvpb.ReplicationChangesForPromotion(addTarget) - } else if !found { - chgs = []kvpb.ReplicationChange{ - {ChangeType: roachpb.ADD_VOTER, Target: addTarget}, - } - } else { - return nil, false, errors.AssertionFailedf( - "invalid rebalancing decision: trying to"+ - " move voter to a store that already has a replica %s for the range", rdesc, - ) - } - return chgs, false, err - } - switch rebalanceTargetType { case allocatorimpl.VoterTarget: // Check if the target being added already has a non-voting replica. diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter b/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter index 808a32001bf7..b8cc9bc45e59 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter @@ -14,18 +14,18 @@ plot stat=replicas ---- ---- - 100.00 ┼──────╭───────────────╮ - 93.33 ┤ ╭╯ ╰╮ ╰─╮ - 86.67 ┤ │ ╰╮ ╭╰╮─────────╮ - 80.00 ┤ │ │ ╭╯ │ ╭─────────╮╮ - 73.33 ┤ ╭╯ ╰╮ ╭─╯ ╰╮ ╭╯ ╰╮ ╭╮╰────╮─────╭╮ - 66.67 ┤ │ ╰╮ │ ╰╮ ╭╯ ╰╮ ╭─╯╰╯ ╰─╮╭──╯╰───────╮─╮ - 60.00 ┤ ╭╯ │ ╭─╯ │ ╭──╯ ╰╮│ ╰╯ ╰╯ ╰──╰───────────── - 53.33 ┤ │ ╰╮ ╭╯ ╰──╯ ╰╯ ╰───╯╰ - 46.67 ┤ ╭╯ │ │ - 40.00 ┤ │ ╰╮ ╭╯ - 33.33 ┤ │ │╭╯ - 26.67 ┤ ╭╯ ╰╯ + 100.00 ┼╮ + 93.33 ┤│ + 86.67 ┤╰╮ + 80.00 ┤ │ + 73.33 ┤ ╰╮ + 66.67 ┤ │ + 60.00 ┤ │ + 53.33 ┤ ╰──────────────────────────────────────────────────────────────────────────── + 46.67 ┤ ╭──────────────────────────────────────────────────────────────────────────── + 40.00 ┤ │ + 33.33 ┤ │ + 26.67 ┤ ╭╯ 20.00 ┤ │ 13.33 ┤╭╯ 6.67 ┤│ @@ -38,22 +38,22 @@ plot stat=leases ---- ---- - 100.00 ┼──────╮ ╭───────╮ - 93.33 ┤ ╰╮ ╭╯ ╰╮ - 86.67 ┤ │ │ │ ╭──╮ - 80.00 ┤ ╰╮ ╭╯ ╰╮ ╭─╯ ╰╮ - 73.33 ┤ │ │ │ ╭╯ ╰╮ ╭────╮ ╭───╮ - 66.67 ┤ ╰╮╭╯ ╰╮ ╭╯ ╰╮ ╭╯ ╰╮ ╭╯ ╰╮ - 60.00 ┤ ││ ╰╮│ ╰╮╭╯ ╰╮╭╯ ╰╮ ╭───╮ ╭───╮ - 53.33 ┤ ╭╯ │╯ ╭╯ ╰╮ ╰╭─╯ ╰──╮─╯ ╰─╭──── - 46.67 ┤ │╮ ╰╮ │╮ ╭│ ╭╯─╮ ╭─╯╰─╮ ╭─╯──── - 40.00 ┤ ││ ╭╯│ ╭╯╰╮ ╭╯╰╮ ╭╯ ╰───╯ ╰───╯ - 33.33 ┤ ╭╯╰╮ ╭╯ ╰╮ ╭╯ ╰╮ ╭╯ ╰╮ ╭╯ - 26.67 ┤ │ │ │ ╰╮ ╭╯ ╰────╯ ╰───╯ - 20.00 ┤ ╭╯ ╰╮ ╭╯ ╰─╮ ╭╯ - 13.33 ┤ │ │ ╭╯ ╰──╯ - 6.67 ┤ ╭╯ ╰╮ │ - 0.00 ┼──────╯ ╰───────╯ + 100.00 ┼╮ + 93.33 ┤│ + 86.67 ┤╰╮ + 80.00 ┤ │ + 73.33 ┤ ╰╮ + 66.67 ┤ │ + 60.00 ┤ │ + 53.33 ┤ ╰──────────────────────────────────────────────────────────────────────────── + 46.67 ┤ ╭──────────────────────────────────────────────────────────────────────────── + 40.00 ┤ │ + 33.33 ┤ │ + 26.67 ┤ ╭╯ + 20.00 ┤ │ + 13.33 ┤╭╯ + 6.67 ┤│ + 0.00 ┼╯ leases ---- ---- diff --git a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges index a728c61fce1b..d1c29de4bf6a 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges +++ b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges @@ -177,48 +177,7 @@ configurations generated using seed 2643318057788968173 randomized ranges with placement_type=random, ranges=944, key_space=150098, replication_factor=1, bytes=0 basic load with rw_ratio=0.00, rate=0.00, skewed_access=false, min_block_size=1, max_block_size=1, min_key=1, max_key=200000 number of mutation events=0, number of assertion events=0 -initial state at 2022-03-21 11:00:00: - stores(15)=[s1n1=(replicas(64)),s2n2=(replicas(51)),s3n3=(replicas(12)),s4n4=(replicas(25)),s5n5=(replicas(115)),s6n6=(replicas(90)),s7n7=(replicas(114)),s8n8=(replicas(25)),s9n9=(replicas(26)),s10n10=(replicas(64)),s11n11=(replicas(0)),s12n12=(replicas(114)),s13n13=(replicas(103)),s14n14=(replicas(51)),s15n15=(replicas(90))] -topology: -US - US_1 - └── [1 2 3 4 5] - US_2 - └── [6 7 8 9 10] - US_3 - └── [11 12 13 14 15] -no events were scheduled -sample2: failed assertion - conformance unavailable=0 under=0 over=0 violating=0 lease-violating=0 lease-less-preferred=0 - actual unavailable=0 under=0, over=27 violating=0 lease-violating=0 lease-less-preferred=0 -over replicated: - r63:00000{09858-10017} [(n1,s1):4, (n9,s9):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r77:0000012{084-243} [(n4,s4):2, (n9,s9):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r174:0000027{507-666} [(n6,s6):4, (n9,s9):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r192:0000030{369-528} [(n6,s6):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r237:0000037{524-683} [(n5,s5):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r255:0000040{386-545} [(n8,s8):2, (n9,s9):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r348:0000055{173-332} [(n1,s1):1, (n9,s9):2] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r370:0000058{671-830} [(n5,s5):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r378:00000{59943-60102} [(n8,s8):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r387:0000061{374-533} [(n12,s12):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r419:0000066{462-621} [(n1,s1):6, (n9,s9):7] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r483:0000076{638-797} [(n2,s2):4, (n9,s9):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r513:0000081{408-567} [(n6,s6):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r599:0000095{082-241} [(n11,s11):2, (n9,s9):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r621:0000098{580-739} [(n14,s14):2, (n9,s9):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r637:0000101{124-283} [(n13,s13):4, (n9,s9):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r644:0000102{237-396} [(n2,s2):2, (n9,s9):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r645:0000102{396-555} [(n3,s3):2, (n9,s9):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r681:0000108{120-279} [(n12,s12):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r693:0000110{028-187} [(n14,s14):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r712:0000113{049-208} [(n11,s11):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r732:0000116{229-388} [(n5,s5):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r740:0000117{501-660} [(n12,s12):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r763:0000121{158-317} [(n10,s10):4, (n9,s9):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r796:0000126{405-564} [(n10,s10):4, (n9,s9):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r805:0000127{836-995} [(n5,s5):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r928:0000147{393-552} [(n12,s12):2, (n9,s9):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 +sample2: pass ---------------------------------- sample3: start running configurations generated using seed 6972490225919430754 @@ -227,47 +186,5 @@ configurations generated using seed 6972490225919430754 randomized ranges with placement_type=random, ranges=479, key_space=199954, replication_factor=1, bytes=0 basic load with rw_ratio=0.00, rate=0.00, skewed_access=false, min_block_size=1, max_block_size=1, min_key=1, max_key=200000 number of mutation events=0, number of assertion events=0 -initial state at 2022-03-21 11:00:00: - stores(15)=[s1n1=(replicas(27)),s2n2=(replicas(1)),s3n3=(replicas(40)),s4n4=(replicas(13)),s5n5=(replicas(26)),s6n6=(replicas(13)),s7n7=(replicas(60)),s8n8=(replicas(60)),s9n9=(replicas(53)),s10n10=(replicas(14)),s11n11=(replicas(59)),s12n12=(replicas(6)),s13n13=(replicas(13)),s14n14=(replicas(40)),s15n15=(replicas(54))] -topology: -US - US_1 - └── [1 2 3 4 5] - US_2 - └── [6 7 8 9 10] - US_3 - └── [11 12 13 14 15] -no events were scheduled -sample3: failed assertion - conformance unavailable=0 under=0 over=0 violating=0 lease-violating=0 lease-less-preferred=0 - actual unavailable=0 under=0, over=28 violating=0 lease-violating=0 lease-less-preferred=0 -over replicated: - r17:000000{6672-7089} [(n10,s10):4, (n5,s5):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r39:000001{5846-6263} [(n8,s8):6, (n1,s1):7] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r88:0000036{279-696} [(n6,s6):6, (n14,s14):7] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r93:0000038{364-781} [(n14,s14):5, (n8,s8):6] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r115:0000047{538-955} [(n8,s8):7, (n10,s10):8] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r128:000005{2959-3376} [(n6,s6):4, (n1,s1):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r137:000005{6712-7129} [(n1,s1):7, (n3,s3):8] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r139:0000057{546-963} [(n6,s6):3, (n8,s8):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r197:000008{1732-2149} [(n14,s14):5, (n10,s10):6] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r198:0000082{149-566} [(n6,s6):2, (n14,s14):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r201:0000083{400-817} [(n10,s10):4, (n1,s1):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r207:000008{5902-6319} [(n6,s6):4, (n14,s14):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r211:0000087{570-987} [(n10,s10):3, (n4,s4):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r214:000008{8821-9238} [(n1,s1):5, (n10,s10):6] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r219:000009{0906-1323} [(n8,s8):4, (n1,s1):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r230:0000095{493-910} [(n8,s8):4, (n10,s10):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r244:0000101{331-748} [(n10,s10):2, (n1,s1):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r253:0000105{084-501} [(n10,s10):2, (n1,s1):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r267:000011{0922-1339} [(n14,s14):5, (n10,s10):6] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r349:0000145{116-533} [(n10,s10):3, (n9,s9):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r353:000014{6784-7201} [(n10,s10):5, (n1,s1):6] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r403:000016{7634-8051} [(n14,s14):4, (n8,s8):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r421:0000175{140-557} [(n12,s12):4, (n10,s10):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r425:000017{6808-7225} [(n14,s14):4, (n1,s1):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r431:0000179{310-727} [(n12,s12):5, (n8,s8):6] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r433:0000180{144-561} [(n10,s10):4, (n1,s1):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r455:0000189{318-735} [(n6,s6):3, (n10,s10):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r477:0000198{492-909} [(n1,s1):4, (n7,s7):5] applying ttl_seconds=0 num_replicas=1 num_voters=1 +sample3: pass ---------------------------------- diff --git a/pkg/sql/scatter_test.go b/pkg/sql/scatter_test.go index 7ce8ccc67395..d5099b496108 100644 --- a/pkg/sql/scatter_test.go +++ b/pkg/sql/scatter_test.go @@ -17,7 +17,9 @@ import ( "testing" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/config/zonepb" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/server" "github.com/cockroachdb/cockroach/pkg/sql/catalog/desctestutils" "github.com/cockroachdb/cockroach/pkg/sql/randgen" "github.com/cockroachdb/cockroach/pkg/testutils" @@ -27,6 +29,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" ) func TestScatterRandomizeLeases(t *testing.T) { @@ -176,3 +180,98 @@ func TestScatterResponse(t *testing.T) { t.Fatalf("expected %d rows, but got %d", e, a) } } + +// TestScatterWithOneVoter tests that the scatter command works when the +// replication factor is set to 1. We expect that the total number of replicas +// remains unchanged and that the scattering store loses some replicas. Note we +// don't assert on the final distribution being even across all stores, scatter +// promises randomness, not necessarily uniformity. +func TestScatterWithOneVoter(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + skip.UnderRace(t) // Too slow under stressrace. + skip.UnderDeadlock(t) + skip.UnderShort(t) + + zcfg := zonepb.DefaultZoneConfig() + zcfg.NumReplicas = proto.Int32(1) + + tc := serverutils.StartCluster(t, 3, base.TestClusterArgs{ + ReplicationMode: base.ReplicationManual, + ServerArgs: base.TestServerArgs{ + Knobs: base.TestingKnobs{ + Server: &server.TestingKnobs{ + DefaultZoneConfigOverride: &zcfg, + }, + }, + }, + }) + defer tc.Stopper().Stop(context.Background()) + + sqlutils.CreateTable( + t, tc.ServerConn(0), "t", + "k INT PRIMARY KEY, v INT", + 500, + sqlutils.ToRowFn(sqlutils.RowIdxFn, sqlutils.RowModuloFn(10)), + ) + + r := sqlutils.MakeSQLRunner(tc.ServerConn(0)) + + // Create 49 splits, for 50 ranges in the test table. + r.Exec(t, "ALTER TABLE test.t SPLIT AT (SELECT i*10 FROM generate_series(1, 49) AS g(i))") + + getReplicaCounts := func() (map[int]int, int, error) { + rows := r.Query(t, ` + WITH ranges_info AS ( + SHOW RANGES FROM TABLE test.t + ) + SELECT + store_id, + count(*) AS replica_count + FROM + ( + SELECT + unnest(replicas) AS store_id + FROM + ranges_info + ) AS store_replicas + GROUP BY + store_id;`) + replicaCounts := make(map[int]int) + totalReplicas := 0 + numRows := 0 + for ; rows.Next(); numRows++ { + var storeID, replicaCount int + if err := rows.Scan(&storeID, &replicaCount); err != nil { + return nil, 0, err + } + replicaCounts[storeID] = replicaCount + totalReplicas += replicaCount + } + if err := rows.Err(); err != nil { + return nil, 0, err + } + return replicaCounts, totalReplicas, nil + } + + oldReplicaCounts, oldTotalReplicas, err := getReplicaCounts() + if err != nil { + t.Fatal(err) + } + + // Expect that the number of replicas on store 1 to have changed. We can't + // assert that the distribution will be even across all three stores, but s1 + // (the initial leaseholder and replica) should have a different number of + // replicas than before. rebalancing is otherwise disabled in this test, so + // the only replica movements are from the scatter. + r.Exec(t, "ALTER TABLE test.t SCATTER") + newReplicaCounts, newTotalReplicas, err := getReplicaCounts() + require.NoError(t, err) + + require.Equal(t, oldTotalReplicas, newTotalReplicas, + "expected total replica count to remain the same post-scatter, "+ + "old replica counts(%d): %v, new replica counts(%d): %v", + oldTotalReplicas, oldReplicaCounts, newTotalReplicas, newReplicaCounts) + require.NotEqual(t, oldReplicaCounts[1], newReplicaCounts[1]) +}