Skip to content

Commit

Permalink
kvserver: tighten and de-fatal replicaGC checks
Browse files Browse the repository at this point in the history
Now we return most failed checks as errors and report errors to sentry (but
without crashing). `TestStoreRemoveReplicaDestroy` was updated with somewhat
better coverage of these error conditions, which is now

I also slightly rearranged the checks; it makes sense to me to check that the
store doesn't have a different Replica, rather than using a potentially old
Replica and failing the nextReplicaID check.

    old order                                     new order
    - assert replica is inited                    - assert replica is inited
    - assert opts are compatible with each other  - assert opts are compatible with each other
    - maybe return early                          - maybe return early
    - assert destroy status                       - assert destroy status
    - assert replica id < nextReplicaID ───┐  ┌─► - *assert same replica in store*
    - assert replica is inited (again)     └──┼─► - *assert replica id < nextReplicaID*
    - set destroy status                      │   - set destroy status
    - assert same replica in store────────────┘   - remainder
    - remainder

Closes #94813.

Epic: None
Release note: None
  • Loading branch information
tbg committed Jul 5, 2023
1 parent 6984579 commit 267099d
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 39 deletions.
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/replica_gc_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/spanconfig"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logcrash"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/errors"
"go.etcd.io/raft/v3"
Expand Down Expand Up @@ -323,6 +324,9 @@ func (rgcq *replicaGCQueue) process(
if err := repl.store.RemoveReplica(ctx, repl, nextReplicaID, RemoveOptions{
DestroyData: true,
}); err != nil {
// Should never get an error from RemoveReplica.
const format = "error during replicaGC: %v"
logcrash.ReportOrPanic(ctx, &repl.store.ClusterSettings().SV, format, err)
return false, err
}
} else {
Expand Down
74 changes: 40 additions & 34 deletions pkg/kv/kvserver/store_remove_replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,64 +88,70 @@ func (s *Store) removeInitializedReplicaRaftMuLocked(

}

// Sanity checks before committing to the removal by setting the
// destroy status.
var desc *roachpb.RangeDescriptor
{
// Run sanity checks and on success commit to the removal by setting the
// destroy status. If (nil, nil) is returned, there's nothing to do.
desc, err := func() (*roachpb.RangeDescriptor, error) {
rep.readOnlyCmdMu.Lock()
defer rep.readOnlyCmdMu.Unlock()
s.mu.Lock()
defer s.mu.Unlock()
rep.mu.Lock()
defer rep.mu.Unlock()

if opts.DestroyData {
// Detect if we were already removed.
if rep.mu.destroyStatus.Removed() {
rep.mu.Unlock()
rep.readOnlyCmdMu.Unlock()
return nil, nil // already removed, noop
}
} else {
// If the caller doesn't want to destroy the data because it already
// has done so, then it must have already also set the destroyStatus.
if !rep.mu.destroyStatus.Removed() {
rep.mu.Unlock()
rep.readOnlyCmdMu.Unlock()
log.Fatalf(ctx, "replica not marked as destroyed but data already destroyed: %v", rep)
return nil, errors.AssertionFailedf("replica not marked as destroyed but data already destroyed: %v", rep)
}
}

desc = rep.mu.state.Desc
if repDesc, ok := desc.GetReplicaDescriptor(s.StoreID()); ok && repDesc.ReplicaID >= nextReplicaID {
rep.mu.Unlock()
rep.readOnlyCmdMu.Unlock()
// NB: This should not in any way be possible starting in 20.1.
log.Fatalf(ctx, "replica descriptor's ID has changed (%s >= %s)",
repDesc.ReplicaID, nextReplicaID)
// Check if Replica is in the Store.
//
// There is a certain amount of idempotency in this method (repeat attempts
// to destroy an already destroyed replica are allowed when DestroyData is
// set, see the early return above), but if we get here then the Replica
// better be in the Store (since the Store enforces ownership over the
// keyspace, so deleting data for a Replica that's not in the Store can
// delete random active data). Note that if the caller has !DestroyData,
// this means it needs to already know that the Replica is still in the
// Store.
if existing, ok := s.mu.replicasByRangeID.Load(rep.RangeID); !ok {
return nil, errors.AssertionFailedf("cannot remove replica which does not exist in Store")
} else if existing != rep {
return nil, errors.AssertionFailedf("replica %v replaced by %v before being removed",
rep, existing)
}

// This is a fatal error as an initialized replica can never become
// uninitialized.
if !rep.IsInitialized() {
rep.mu.Unlock()
rep.readOnlyCmdMu.Unlock()
log.Fatalf(ctx, "uninitialized replica cannot be removed with removeInitializedReplica: %v", rep)
// Now we know that the Store's Replica is identical to the passed-in
// Replica.
desc := rep.mu.state.Desc
if repDesc, ok := desc.GetReplicaDescriptor(s.StoreID()); ok && repDesc.ReplicaID >= nextReplicaID {
// The ReplicaID of a Replica is immutable.
return nil, errors.AssertionFailedf("replica descriptor's ID has changed (%s >= %s)",
repDesc.ReplicaID, nextReplicaID)
}

// Mark the replica as removed before deleting data.
// Sanity checks passed. Mark the replica as removed before deleting data.
rep.mu.destroyStatus.Set(kvpb.NewRangeNotFoundError(rep.RangeID, rep.StoreID()),
destroyReasonRemoved)
rep.mu.Unlock()
rep.readOnlyCmdMu.Unlock()
}
// Proceed with the removal, all errors encountered from here down are fatal.

// Another sanity check that this replica is a part of this store.
existing, err := s.GetReplica(rep.RangeID)
return desc, nil
}()
if err != nil {
log.Fatalf(ctx, "cannot remove replica which does not exist: %v", err)
} else if existing != rep {
log.Fatalf(ctx, "replica %v replaced by %v before being removed",
rep, existing)
return nil, err
}
if desc == nil {
// Already removed/removing, no-op.
return nil, nil
}

// Proceed with the removal, all errors encountered from here down are fatal.

// During merges, the context might have the subsuming range, so we explicitly
// log the replica to be removed.
log.Infof(ctx, "removing replica r%d/%d", rep.RangeID, rep.replicaID)
Expand Down
26 changes: 21 additions & 5 deletions pkg/kv/kvserver/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -740,11 +740,27 @@ func TestStoreRemoveReplicaDestroy(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if err := store.RemoveReplica(ctx, repl1, repl1.Desc().NextReplicaID, RemoveOptions{
DestroyData: true,
}); err != nil {
t.Fatal(err)
}

// Can't remove Replica with DestroyData false because this requires the destroyStatus
// to already have been set by the caller (but we didn't).
require.ErrorContains(t, store.RemoveReplica(ctx, repl1, repl1.Desc().NextReplicaID, RemoveOptions{
DestroyData: false,
}), `replica not marked as destroyed`)

// Remove the Replica twice, as this should be idempotent.
// NB: we rely on this idempotency today (as @tbg found out when he accidentally
// removed it).
for i := 0; i < 2; i++ {
require.NoError(t, store.RemoveReplica(ctx, repl1, repl1.Desc().NextReplicaID, RemoveOptions{
DestroyData: true,
}), "%d", i)
}

// However, if we have DestroyData=false, caller is expected to be the unique first "destroyer"
// of the Replica.
require.ErrorContains(t, store.RemoveReplica(ctx, repl1, repl1.Desc().NextReplicaID, RemoveOptions{
DestroyData: false,
}), `does not exist`)

// Verify that removal of a replica marks it as destroyed so that future raft
// commands on the Replica will silently be dropped.
Expand Down

0 comments on commit 267099d

Please sign in to comment.