Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kv: scan empty right-hand side of split for stats #78218

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 77 additions & 24 deletions pkg/kv/kvserver/batcheval/cmd_end_transaction.go
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
@@ -747,17 +748,22 @@ func RunCommitTrigger(
return result.Result{}, nil
}

// splitTrigger is called on a successful commit of a transaction
// containing an AdminSplit operation. It copies the AbortSpan for
// the new range and recomputes stats for both the existing, left hand
// side (LHS) range and the right hand side (RHS) range. For
// performance it only computes the stats for the original range (the
// left hand side) and infers the RHS stats by subtracting from the
// original stats. We compute the LHS stats because the split key
// computation ensures that we do not create large LHS
// ranges. However, this optimization is only possible if the stats
// are fully accurate. If they contain estimates, stats for both the
// LHS and RHS are computed.
// splitTrigger is called on a successful commit of a transaction containing an
// AdminSplit operation. It copies the AbortSpan for the new range and
// recomputes stats for both the existing, left hand side (LHS) range and the
// right hand side (RHS) range. For performance it only computes the stats for
// one side of the range and infers the stats for the other side by subtracting
// from the original stats. The choice of which side to scan is controlled by a
// heuristic. This choice defaults to scanning the LHS stats and inferring the
// RHS because the split key computation performed by the splitQueue ensures
// that we do not create large LHS ranges. However, if the RHS's global keyspace
// is entirely empty, it is scanned first instead. An example where we expect
// this heuristic to choose the RHS is bulk ingestion, which often splits off
// empty ranges and benefits from scanning the empty RHS when computing stats.
// Regardless of the choice of which side to scan first, the optimization to
// infer the other side's stats is only possible if the stats are fully accurate
// (ContainsEstimates = 0). If they contain estimates, stats for both the LHS
// and RHS are computed.
//
// Splits are complicated. A split is initiated when a replica receives an
// AdminSplit request. Note that this request (and other "admin" requests)
@@ -915,29 +921,72 @@ func splitTrigger(
split.RightDesc.StartKey, split.RightDesc.EndKey, desc)
}

// Compute the absolute stats for the (post-split) LHS. No more
// modifications to it are allowed after this line.

leftMS, err := rditer.ComputeStatsForRange(&split.LeftDesc, batch, ts.WallTime)
// Determine which side to scan first when computing the post-split stats. We
// scan the left-hand side first unless the right side's global keyspace is
// entirely empty. In cases where the range's stats do not already contain
// estimates, only one side needs to be scanned.
// TODO(nvanbenschoten): this is a simple heuristic. If we had a cheap way to
// determine the relative sizes of the LHS and RHS, we could be more
// sophisticated here and always choose to scan the cheaper side.
emptyRHS, err := isGlobalKeyspaceEmpty(batch, &split.RightDesc)
if err != nil {
return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, "unable to compute stats for LHS range after split")
return enginepb.MVCCStats{}, result.Result{}, errors.Wrapf(err,
"unable to determine whether right hand side of split is empty")
}
log.Event(ctx, "computed stats for left hand side range")

h := splitStatsHelperInput{
AbsPreSplitBothEstimated: rec.GetMVCCStats(),
DeltaBatchEstimated: bothDeltaMS,
AbsPostSplitLeft: leftMS,
AbsPostSplitRightFn: func() (enginepb.MVCCStats, error) {
rightMS, err := rditer.ComputeStatsForRange(
&split.RightDesc, batch, ts.WallTime,
)
return rightMS, errors.Wrap(err, "unable to compute stats for RHS range after split")
},
AbsPostSplitLeftFn: makeScanStatsFn(ctx, batch, ts, &split.LeftDesc, "left hand side"),
AbsPostSplitRightFn: makeScanStatsFn(ctx, batch, ts, &split.RightDesc, "right hand side"),
ScanRightFirst: splitScansRightForStatsFirst || emptyRHS,
}
return splitTriggerHelper(ctx, rec, batch, h, split, ts)
}

// splitScansRightForStatsFirst controls whether the left hand side or the right
// hand side of the split is scanned first on the leaseholder when evaluating
// the split trigger. In practice, the splitQueue wants to scan the left hand
// side because the split key computation ensures that we do not create large
// LHS ranges. However, to improve test coverage, we use a metamorphic value.
var splitScansRightForStatsFirst = util.ConstantWithMetamorphicTestBool(
"split-scans-right-for-stats-first", false)

// isGlobalKeyspaceEmpty returns whether the global keyspace of the provided
// range is entirely empty. The function returns false if the global keyspace
// contains at least one key.
func isGlobalKeyspaceEmpty(reader storage.Reader, d *roachpb.RangeDescriptor) (bool, error) {
span := d.KeySpan().AsRawSpanWithNoLocals()
iter := reader.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{UpperBound: span.EndKey})
defer iter.Close()
iter.SeekGE(storage.MakeMVCCMetadataKey(span.Key))
ok, err := iter.Valid()
if err != nil {
return false, err
}
return !ok /* empty */, nil
}

// makeScanStatsFn constructs a splitStatsScanFn for the provided post-split
// range descriptor which computes the range's statistics.
func makeScanStatsFn(
ctx context.Context,
reader storage.Reader,
ts hlc.Timestamp,
sideDesc *roachpb.RangeDescriptor,
sideName string,
) splitStatsScanFn {
return func() (enginepb.MVCCStats, error) {
sideMS, err := rditer.ComputeStatsForRange(sideDesc, reader, ts.WallTime)
if err != nil {
return enginepb.MVCCStats{}, errors.Wrapf(err,
"unable to compute stats for %s range after split", sideName)
}
log.Eventf(ctx, "computed stats for %s range", sideName)
return sideMS, nil
}
}

// splitTriggerHelper continues the work begun by splitTrigger, but has a
// reduced scope that has all stats-related concerns bundled into a
// splitStatsHelper.
@@ -967,6 +1016,10 @@ func splitTriggerHelper(
return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, "unable to copy last replica GC timestamp")
}

// Compute the absolute stats for the (post-split) ranges. No more
// modifications to the left hand side are allowed after this line and any
// modifications to the right hand side are accounted for by updating the
// helper's AbsPostSplitRight() reference.
h, err := makeSplitStatsHelper(statsInput)
if err != nil {
return enginepb.MVCCStats{}, result.Result{}, err
83 changes: 61 additions & 22 deletions pkg/kv/kvserver/batcheval/split_stats_helper.go
Original file line number Diff line number Diff line change
@@ -25,11 +25,12 @@ import "github.com/cockroachdb/cockroach/pkg/storage/enginepb"
// but nothing in this code relies on that). Since we have no reason to
// introduce ContainsEstimates in a split trigger, this typically has
// ContainsEstimates unset, but the results will be estimate free either way.
// - AbsPostSplitLeft: the stats of the range after applying the split, i.e.
// accounting both for the shrinking as well as for the writes in DeltaBatch
// related to the shrunk keyrange.
// In practice, we obtain this by recomputing the stats, and so we don't
// expect ContainsEstimates to be set in them.
// - AbsPostSplit{Left,Right}: the stats of either the left or right hand side
// range after applying the split, i.e. accounting both for the shrinking as
// well as for the writes in DeltaBatch related to the shrunk keyrange. In
// practice, we obtain this by recomputing the stats using the corresponding
// AbsPostSplit{Left,Right}Fn, and so we don't expect ContainsEstimates to be
// set in them. The choice of which side to scan is controlled by ScanRightFirst.
//
// We are interested in computing from this the quantities
//
@@ -103,48 +104,86 @@ import "github.com/cockroachdb/cockroach/pkg/storage/enginepb"
type splitStatsHelper struct {
in splitStatsHelperInput

absPostSplitLeft *enginepb.MVCCStats
absPostSplitRight *enginepb.MVCCStats
}

// splitStatsScanFn scans a post-split keyspace to compute its stats. The
// computed stats should not contain estimates.
type splitStatsScanFn func() (enginepb.MVCCStats, error)

// splitStatsHelperInput is passed to makeSplitStatsHelper.
type splitStatsHelperInput struct {
AbsPreSplitBothEstimated enginepb.MVCCStats
DeltaBatchEstimated enginepb.MVCCStats
AbsPostSplitLeft enginepb.MVCCStats
// AbsPostSplitLeftFn returns the stats for the left hand side of the
// split.
AbsPostSplitLeftFn splitStatsScanFn
// AbsPostSplitRightFn returns the stats for the right hand side of the
// split. This is only called (and only once) when either of the first two
// fields above contains estimates, so that we can guarantee that the
// post-splits stats don't.
AbsPostSplitRightFn func() (enginepb.MVCCStats, error)
// split.
AbsPostSplitRightFn splitStatsScanFn
// ScanRightFirst controls whether the left hand side or the right hand
// side of the split is scanned first. In cases where neither of the
// input stats contain estimates, this is the only side that needs to
// be scanned.
ScanRightFirst bool
}

// makeSplitStatsHelper initializes a splitStatsHelper. The values in the input
// are assumed to not change outside of the helper and must no longer be used.
// The provided AbsPostSplitRightFn recomputes the right hand side of the split
// after accounting for the split trigger batch. This is only invoked at most
// once, and only when necessary.
// The provided AbsPostSplitLeftFn and AbsPostSplitRightFn recompute the left
// and right hand sides of the split after accounting for the split trigger
// batch. Each are only invoked at most once, and only when necessary.
func makeSplitStatsHelper(input splitStatsHelperInput) (splitStatsHelper, error) {
h := splitStatsHelper{
in: input,
}

// Scan to compute the stats for the first side.
var absPostSplitFirst enginepb.MVCCStats
var err error
if h.in.ScanRightFirst {
absPostSplitFirst, err = input.AbsPostSplitRightFn()
h.absPostSplitRight = &absPostSplitFirst
} else {
absPostSplitFirst, err = input.AbsPostSplitLeftFn()
h.absPostSplitLeft = &absPostSplitFirst
}
if err != nil {
return splitStatsHelper{}, err
}

if h.in.AbsPreSplitBothEstimated.ContainsEstimates == 0 &&
h.in.DeltaBatchEstimated.ContainsEstimates == 0 {
// We have CombinedErrorDelta zero, so use arithmetic to compute
// AbsPostSplitRight().
// We have CombinedErrorDelta zero, so use arithmetic to compute the
// stats for the second side.
ms := h.in.AbsPreSplitBothEstimated
ms.Subtract(h.in.AbsPostSplitLeft)
ms.Subtract(absPostSplitFirst)
ms.Add(h.in.DeltaBatchEstimated)
h.absPostSplitRight = &ms
if h.in.ScanRightFirst {
h.absPostSplitLeft = &ms
} else {
h.absPostSplitRight = &ms
}
return h, nil
}
// Estimates are contained in the input, so ask the oracle for
// AbsPostSplitRight().
ms, err := input.AbsPostSplitRightFn()

// Estimates are contained in the input, so ask the oracle to scan to compute
// the stats for the second side. We only scan the second side when either of
// the input stats above (AbsPreSplitBothEstimated or DeltaBatchEstimated)
// contains estimates, so that we can guarantee that the post-splits stats
// don't.
var absPostSplitSecond enginepb.MVCCStats
if h.in.ScanRightFirst {
absPostSplitSecond, err = input.AbsPostSplitLeftFn()
h.absPostSplitLeft = &absPostSplitSecond
} else {
absPostSplitSecond, err = input.AbsPostSplitRightFn()
h.absPostSplitRight = &absPostSplitSecond
}
if err != nil {
return splitStatsHelper{}, err
}
h.absPostSplitRight = &ms
return h, nil
}

@@ -167,7 +206,7 @@ func (h splitStatsHelper) DeltaPostSplitLeft() enginepb.MVCCStats {
// NB: if we ever wanted to also write to the left hand side after init'ing
// the helper, we can make that work, too.
// NB: note how none of this depends on mutations to absPostSplitRight.
ms := h.in.AbsPostSplitLeft
ms := *h.absPostSplitLeft
ms.Subtract(h.in.AbsPreSplitBothEstimated)

return ms