Skip to content

Commit

Permalink
storage: don't write nonzero sticky bit before 19.2
Browse files Browse the repository at this point in the history
Before 19.2, callers to AdminSplit are only ever supposed to pass
`Expiration==hlc.Timestamp{}` as this triggers the legacy code path
necessary while nodes in the cluster may not know desc.StickyBit yet. To
avoid CPut problems when those nodes update the range descriptor, we
must not persist non-nil values on it in that case. It looked like we
would sometimes still persist a &zero, which could cause problems.

The bigger problem though was that there were also two callers that
straight-up didn't check the cluster version and passed nonzero values
into AdminSplit. These callers were added recently and I can't blame
anyone there; it is impossible to know that one argument needs to be
zero before 19.2.

Instead of trying to fix this invariant (which wasn't trivial in this
case) just ignore expiration times when the coordinator doesn't think
19.2 is already active. This could lead to sticky bits being ignored
right around the cluster transition, but that seems much better than
risking old nodes not being able to carry out any changes to the
descriptors any more (which is the consequence of writing non-nil
sticky bits before this is safe).

Release note (bug fix): Fix a cluster migration bug that could occur
while running in a mixed 19.1/19.2 cluster. The symptom would be
messages of the form:

```
X at key /Table/54 failed: unexpected value: ...
```

Affected clusters should be updated to 19.2 or, if 19.1 is desired,
recreated from a backup.
  • Loading branch information
tbg authored and nvanbenschoten committed Sep 9, 2019
1 parent c3b82b0 commit a533375
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 8 deletions.
6 changes: 5 additions & 1 deletion pkg/storage/batcheval/cmd_end_transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,11 @@ func RunCommitTrigger(
}
if sbt := ct.GetStickyBitTrigger(); sbt != nil {
newDesc := *rec.Desc()
newDesc.StickyBit = &sbt.StickyBit
if sbt.StickyBit != (hlc.Timestamp{}) {
newDesc.StickyBit = &sbt.StickyBit
} else {
newDesc.StickyBit = nil
}
var res result.Result
res.Replicated.State = &storagepb.ReplicaState{
Desc: &newDesc,
Expand Down
6 changes: 6 additions & 0 deletions pkg/storage/bulk/sst_batcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,9 @@ func (b *SSTBatcher) doFlush(ctx context.Context, reason int, nextKey roachpb.Ke
if splitAt, err := keys.EnsureSafeSplitKey(start); err != nil {
log.Warning(ctx, err)
} else {
// NB: Passing 'hour' here is technically illegal until 19.2 is
// active, but the value will be ignored before that, and we don't
// have access to the cluster version here.
if err := b.db.SplitAndScatter(ctx, splitAt, hour); err != nil {
log.Warning(ctx, err)
}
Expand Down Expand Up @@ -289,6 +292,9 @@ func (b *SSTBatcher) doFlush(ctx context.Context, reason int, nextKey roachpb.Ke
log.Warning(ctx, err)
} else {
log.VEventf(ctx, 2, "%s added since last split, splitting/scattering for next range at %v", sz(b.flushedToCurrentRange), end)
// NB: Passing 'hour' here is technically illegal until 19.2 is
// active, but the value will be ignored before that, and we don't
// have access to the cluster version here.
if err := b.db.SplitAndScatter(ctx, splitAt, hour); err != nil {
log.Warningf(ctx, "failed to split and scatter during ingest: %+v", err)
}
Expand Down
31 changes: 24 additions & 7 deletions pkg/storage/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func maybeDescriptorChangedError(desc *roachpb.RangeDescriptor, err error) (stri
return fmt.Sprintf("descriptor changed: expected %s != [actual] nil (range subsumed)", desc), true
} else if err := detail.ActualValue.GetProto(&actualDesc); err == nil &&
desc.RangeID == actualDesc.RangeID && !desc.Equal(actualDesc) {
return fmt.Sprintf("descriptor changed: [expected] %s != [actual] %s", desc, &actualDesc), true
return fmt.Sprintf("descriptor changed: [expected] %+#v != [actual] %+#v", desc, &actualDesc), true
}
}
return "", false
Expand Down Expand Up @@ -140,6 +140,11 @@ func prepareSplitDescs(
rightDesc.Generation = leftDesc.Generation
maybeMarkGenerationComparable(st, rightDesc)

setStickyBit(rightDesc, expiration)
return leftDesc, rightDesc
}

func setStickyBit(desc *roachpb.RangeDescriptor, expiration hlc.Timestamp) {
// TODO(jeffreyxiao): Remove this check in 20.1.
// Note that the client API for splitting has expiration time as
// non-nullable, but the internal representation of a sticky bit is nullable
Expand All @@ -149,10 +154,8 @@ func prepareSplitDescs(
// setting it to hlc.Timestamp{}. This check ensures that CPuts would not
// fail on older versions.
if (expiration != hlc.Timestamp{}) {
rightDesc.StickyBit = &expiration
desc.StickyBit = &expiration
}

return leftDesc, rightDesc
}

func splitTxnAttempt(
Expand Down Expand Up @@ -248,7 +251,7 @@ func splitTxnStickyUpdateAttempt(
return err
}
newDesc := *desc
newDesc.StickyBit = &expiration
setStickyBit(&newDesc, expiration)

b := txn.NewBatch()
descKey := keys.RangeDescriptorKey(desc.StartKey)
Expand All @@ -264,7 +267,7 @@ func splitTxnStickyUpdateAttempt(
Commit: true,
InternalCommitTrigger: &roachpb.InternalCommitTrigger{
StickyBitTrigger: &roachpb.StickyBitTrigger{
StickyBit: expiration,
StickyBit: newDesc.GetStickyBit(),
},
},
})
Expand Down Expand Up @@ -296,6 +299,16 @@ func (r *Replica) adminSplitWithDescriptor(
delayable bool,
reason string,
) (roachpb.AdminSplitResponse, error) {
if !r.store.ClusterSettings().Version.IsActive(cluster.VersionStickyBit) {
// If sticky bits aren't supported yet but we receive one anyway, ignore
// it. The callers are supposed to only pass hlc.Timestamp{} in that
// case, but this is violated in at least one case (and there are lots of
// callers that aren't easy to audit and maintain audited). Take the
// small risk that the cluster version is actually active (but we don't
// know it yet) instead of risking broken descriptors.
args.ExpirationTime = hlc.Timestamp{}
}

var err error
// The split queue doesn't care about the set of replicas, so if we somehow
// are being handed one that's in a joint state, finalize that before
Expand Down Expand Up @@ -456,7 +469,11 @@ func (r *Replica) adminUnsplitWithDescriptor(
}

newDesc := *desc
newDesc.StickyBit = &hlc.Timestamp{}
// Use nil instead of &zero until 20.1; this field is new in 19.2. We
// could use &zero here because the sticky bit will never be populated
// before the cluster version reaches 19.2 and the early return above
// already handles that case, but nothing is won in doing so.
newDesc.StickyBit = nil
descKey := keys.RangeDescriptorKey(newDesc.StartKey)

b := txn.NewBatch()
Expand Down

0 comments on commit a533375

Please sign in to comment.