From 267099d448b7014b305aaa17f152038a2d225a55 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Tue, 25 Apr 2023 13:17:25 +0200 Subject: [PATCH] kvserver: tighten and de-fatal replicaGC checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now we return most failed checks as errors and report errors to sentry (but without crashing). `TestStoreRemoveReplicaDestroy` was updated with somewhat better coverage of these error conditions, which is now I also slightly rearranged the checks; it makes sense to me to check that the store doesn't have a different Replica, rather than using a potentially old Replica and failing the nextReplicaID check. old order new order - assert replica is inited - assert replica is inited - assert opts are compatible with each other - assert opts are compatible with each other - maybe return early - maybe return early - assert destroy status - assert destroy status - assert replica id < nextReplicaID ───┐ ┌─► - *assert same replica in store* - assert replica is inited (again) └──┼─► - *assert replica id < nextReplicaID* - set destroy status │ - set destroy status - assert same replica in store────────────┘ - remainder - remainder Closes https://github.com/cockroachdb/cockroach/issues/94813. Epic: None Release note: None --- pkg/kv/kvserver/replica_gc_queue.go | 4 ++ pkg/kv/kvserver/store_remove_replica.go | 74 +++++++++++++------------ pkg/kv/kvserver/store_test.go | 26 +++++++-- 3 files changed, 65 insertions(+), 39 deletions(-) diff --git a/pkg/kv/kvserver/replica_gc_queue.go b/pkg/kv/kvserver/replica_gc_queue.go index ef4fb2f4a317..5491e911c968 100644 --- a/pkg/kv/kvserver/replica_gc_queue.go +++ b/pkg/kv/kvserver/replica_gc_queue.go @@ -21,6 +21,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/spanconfig" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/log/logcrash" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/errors" "go.etcd.io/raft/v3" @@ -323,6 +324,9 @@ func (rgcq *replicaGCQueue) process( if err := repl.store.RemoveReplica(ctx, repl, nextReplicaID, RemoveOptions{ DestroyData: true, }); err != nil { + // Should never get an error from RemoveReplica. + const format = "error during replicaGC: %v" + logcrash.ReportOrPanic(ctx, &repl.store.ClusterSettings().SV, format, err) return false, err } } else { diff --git a/pkg/kv/kvserver/store_remove_replica.go b/pkg/kv/kvserver/store_remove_replica.go index f9aaddc6c108..75b5951d4710 100644 --- a/pkg/kv/kvserver/store_remove_replica.go +++ b/pkg/kv/kvserver/store_remove_replica.go @@ -88,64 +88,70 @@ func (s *Store) removeInitializedReplicaRaftMuLocked( } - // Sanity checks before committing to the removal by setting the - // destroy status. - var desc *roachpb.RangeDescriptor - { + // Run sanity checks and on success commit to the removal by setting the + // destroy status. If (nil, nil) is returned, there's nothing to do. + desc, err := func() (*roachpb.RangeDescriptor, error) { rep.readOnlyCmdMu.Lock() + defer rep.readOnlyCmdMu.Unlock() + s.mu.Lock() + defer s.mu.Unlock() rep.mu.Lock() + defer rep.mu.Unlock() if opts.DestroyData { // Detect if we were already removed. if rep.mu.destroyStatus.Removed() { - rep.mu.Unlock() - rep.readOnlyCmdMu.Unlock() return nil, nil // already removed, noop } } else { // If the caller doesn't want to destroy the data because it already // has done so, then it must have already also set the destroyStatus. if !rep.mu.destroyStatus.Removed() { - rep.mu.Unlock() - rep.readOnlyCmdMu.Unlock() - log.Fatalf(ctx, "replica not marked as destroyed but data already destroyed: %v", rep) + return nil, errors.AssertionFailedf("replica not marked as destroyed but data already destroyed: %v", rep) } } - desc = rep.mu.state.Desc - if repDesc, ok := desc.GetReplicaDescriptor(s.StoreID()); ok && repDesc.ReplicaID >= nextReplicaID { - rep.mu.Unlock() - rep.readOnlyCmdMu.Unlock() - // NB: This should not in any way be possible starting in 20.1. - log.Fatalf(ctx, "replica descriptor's ID has changed (%s >= %s)", - repDesc.ReplicaID, nextReplicaID) + // Check if Replica is in the Store. + // + // There is a certain amount of idempotency in this method (repeat attempts + // to destroy an already destroyed replica are allowed when DestroyData is + // set, see the early return above), but if we get here then the Replica + // better be in the Store (since the Store enforces ownership over the + // keyspace, so deleting data for a Replica that's not in the Store can + // delete random active data). Note that if the caller has !DestroyData, + // this means it needs to already know that the Replica is still in the + // Store. + if existing, ok := s.mu.replicasByRangeID.Load(rep.RangeID); !ok { + return nil, errors.AssertionFailedf("cannot remove replica which does not exist in Store") + } else if existing != rep { + return nil, errors.AssertionFailedf("replica %v replaced by %v before being removed", + rep, existing) } - // This is a fatal error as an initialized replica can never become - // uninitialized. - if !rep.IsInitialized() { - rep.mu.Unlock() - rep.readOnlyCmdMu.Unlock() - log.Fatalf(ctx, "uninitialized replica cannot be removed with removeInitializedReplica: %v", rep) + // Now we know that the Store's Replica is identical to the passed-in + // Replica. + desc := rep.mu.state.Desc + if repDesc, ok := desc.GetReplicaDescriptor(s.StoreID()); ok && repDesc.ReplicaID >= nextReplicaID { + // The ReplicaID of a Replica is immutable. + return nil, errors.AssertionFailedf("replica descriptor's ID has changed (%s >= %s)", + repDesc.ReplicaID, nextReplicaID) } - // Mark the replica as removed before deleting data. + // Sanity checks passed. Mark the replica as removed before deleting data. rep.mu.destroyStatus.Set(kvpb.NewRangeNotFoundError(rep.RangeID, rep.StoreID()), destroyReasonRemoved) - rep.mu.Unlock() - rep.readOnlyCmdMu.Unlock() - } - // Proceed with the removal, all errors encountered from here down are fatal. - - // Another sanity check that this replica is a part of this store. - existing, err := s.GetReplica(rep.RangeID) + return desc, nil + }() if err != nil { - log.Fatalf(ctx, "cannot remove replica which does not exist: %v", err) - } else if existing != rep { - log.Fatalf(ctx, "replica %v replaced by %v before being removed", - rep, existing) + return nil, err + } + if desc == nil { + // Already removed/removing, no-op. + return nil, nil } + // Proceed with the removal, all errors encountered from here down are fatal. + // During merges, the context might have the subsuming range, so we explicitly // log the replica to be removed. log.Infof(ctx, "removing replica r%d/%d", rep.RangeID, rep.replicaID) diff --git a/pkg/kv/kvserver/store_test.go b/pkg/kv/kvserver/store_test.go index c5e2f1e373d5..cd1d09d8283e 100644 --- a/pkg/kv/kvserver/store_test.go +++ b/pkg/kv/kvserver/store_test.go @@ -740,11 +740,27 @@ func TestStoreRemoveReplicaDestroy(t *testing.T) { if err != nil { t.Fatal(err) } - if err := store.RemoveReplica(ctx, repl1, repl1.Desc().NextReplicaID, RemoveOptions{ - DestroyData: true, - }); err != nil { - t.Fatal(err) - } + + // Can't remove Replica with DestroyData false because this requires the destroyStatus + // to already have been set by the caller (but we didn't). + require.ErrorContains(t, store.RemoveReplica(ctx, repl1, repl1.Desc().NextReplicaID, RemoveOptions{ + DestroyData: false, + }), `replica not marked as destroyed`) + + // Remove the Replica twice, as this should be idempotent. + // NB: we rely on this idempotency today (as @tbg found out when he accidentally + // removed it). + for i := 0; i < 2; i++ { + require.NoError(t, store.RemoveReplica(ctx, repl1, repl1.Desc().NextReplicaID, RemoveOptions{ + DestroyData: true, + }), "%d", i) + } + + // However, if we have DestroyData=false, caller is expected to be the unique first "destroyer" + // of the Replica. + require.ErrorContains(t, store.RemoveReplica(ctx, repl1, repl1.Desc().NextReplicaID, RemoveOptions{ + DestroyData: false, + }), `does not exist`) // Verify that removal of a replica marks it as destroyed so that future raft // commands on the Replica will silently be dropped.