Skip to content

Commit

Permalink
liveness: clarify IncrementEpoch race error and log as INFO
Browse files Browse the repository at this point in the history
Epic: none
Release note: None
  • Loading branch information
erikgrinaker committed Apr 17, 2023
1 parent c2975c7 commit c24edff
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 2 deletions.
23 changes: 22 additions & 1 deletion pkg/kv/kvserver/liveness/liveness.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,24 @@ var (
ErrEpochAlreadyIncremented = errors.New("epoch already incremented")
)

type ErrEpochCondFailed struct {
expected, actual livenesspb.Liveness
}

// SafeFormatError implements errors.SafeFormatter.
func (e *ErrEpochCondFailed) SafeFormatError(p errors.Printer) error {
p.Printf(
"liveness record changed while incrementing epoch for %+v; actual is %+v; is the node still live?",
redact.Safe(e.expected), redact.Safe(e.actual))
return nil
}

func (e *ErrEpochCondFailed) Format(s fmt.State, verb rune) { errors.FormatError(e, s, verb) }

func (e *ErrEpochCondFailed) Error() string {
return fmt.Sprint(e)
}

type errRetryLiveness struct {
error
}
Expand Down Expand Up @@ -1223,7 +1241,10 @@ func (nl *NodeLiveness) IncrementEpoch(ctx context.Context, liveness livenesspb.
} else if actual.Epoch < liveness.Epoch {
return errors.Errorf("unexpected liveness epoch %d; expected >= %d", actual.Epoch, liveness.Epoch)
}
return errors.Errorf("mismatch incrementing epoch for %+v; actual is %+v", liveness, actual)
return &ErrEpochCondFailed{
expected: liveness,
actual: actual.Liveness,
}
})
if err != nil {
return err
Expand Down
9 changes: 8 additions & 1 deletion pkg/kv/kvserver/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,14 @@ func (p *pendingLeaseRequest) requestLease(
// so we don't log it as an error.
//
// https://github.com/cockroachdb/cockroach/issues/35986
if !errors.Is(err, liveness.ErrEpochAlreadyIncremented) {
if errors.Is(err, liveness.ErrEpochAlreadyIncremented) {
// ignore
} else if errors.HasType(err, &liveness.ErrEpochCondFailed{}) {
// ErrEpochCondFailed indicates that someone else changed the liveness
// record while we were incrementing it. The node could still be
// alive, or someone else updated it. Don't log this as an error.
log.Infof(ctx, "failed to increment leaseholder's epoch: %s", err)
} else {
log.Errorf(ctx, "failed to increment leaseholder's epoch: %s", err)
}
}
Expand Down

0 comments on commit c24edff

Please sign in to comment.