From 5e405aa8c459be6038ad31ab0bd06b900e2330f2 Mon Sep 17 00:00:00 2001 From: Oleg Afanasyev Date: Mon, 22 May 2023 16:25:10 +0100 Subject: [PATCH] kvserver/batcheval: fix mvcc stats computaion on split Previouly split trigger was using in-memory copy of mvccstats from replica. This value could diverge from store snapshot used by request in special cases of lease extensions that don't hold latches. This was causing stats for the replica to diverge. This PR changes base stats to be read from request reader which provides consistent stats value with content of splitted ranges. Touches: #101721 Release note: None --- .../kvserver/batcheval/cmd_end_transaction.go | 27 +++++++---- .../kvserver/batcheval/split_stats_helper.go | 46 +++++++++---------- 2 files changed, 41 insertions(+), 32 deletions(-) diff --git a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go index bbffc398b4da..7dbc856793ed 100644 --- a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go +++ b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go @@ -1012,13 +1012,24 @@ func splitTrigger( "unable to compute range key stats delta for RHS") } + // Retrieve MVCC Stats from the current batch instead of using stats from + // execution context. Stats in the context could diverge from storage snapshot + // of current request when lease extensions are applied. We must not write + // absolute stats values for LHS based on this value, always produce a delta + // since underlying stats in storage could change. + currentStats, err := MakeStateLoader(rec).LoadMVCCStats(ctx, batch) + if err != nil { + return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, + "unable to fetch original range mvcc stats for split") + } + h := splitStatsHelperInput{ - AbsPreSplitBothEstimated: rec.GetMVCCStats(), - DeltaBatchEstimated: bothDeltaMS, - DeltaRangeKey: rangeKeyDeltaMS, - AbsPostSplitLeftFn: makeScanStatsFn(ctx, batch, ts, &split.LeftDesc, "left hand side"), - AbsPostSplitRightFn: makeScanStatsFn(ctx, batch, ts, &split.RightDesc, "right hand side"), - ScanRightFirst: splitScansRightForStatsFirst || emptyRHS, + AbsPreSplitBothStored: currentStats, + DeltaBatchEstimated: bothDeltaMS, + DeltaRangeKey: rangeKeyDeltaMS, + PostSplitScanLeftFn: makeScanStatsFn(ctx, batch, ts, &split.LeftDesc, "left hand side"), + PostSplitScanRightFn: makeScanStatsFn(ctx, batch, ts, &split.RightDesc, "right hand side"), + ScanRightFirst: splitScansRightForStatsFirst || emptyRHS, } return splitTriggerHelper(ctx, rec, batch, h, split, ts) } @@ -1031,7 +1042,7 @@ func splitTrigger( var splitScansRightForStatsFirst = util.ConstantWithMetamorphicTestBool( "split-scans-right-for-stats-first", false) -// makeScanStatsFn constructs a splitStatsScanFn for the provided post-split +// makeScanStatsFn constructs a getStatsFn for the provided post-split // range descriptor which computes the range's statistics. func makeScanStatsFn( ctx context.Context, @@ -1039,7 +1050,7 @@ func makeScanStatsFn( ts hlc.Timestamp, sideDesc *roachpb.RangeDescriptor, sideName string, -) splitStatsScanFn { +) getStatsFn { return func() (enginepb.MVCCStats, error) { sideMS, err := rditer.ComputeStatsForRange(sideDesc, reader, ts.WallTime) if err != nil { diff --git a/pkg/kv/kvserver/batcheval/split_stats_helper.go b/pkg/kv/kvserver/batcheval/split_stats_helper.go index 51ffd3b06e7d..7fe1cb24bd49 100644 --- a/pkg/kv/kvserver/batcheval/split_stats_helper.go +++ b/pkg/kv/kvserver/batcheval/split_stats_helper.go @@ -16,7 +16,7 @@ import "github.com/cockroachdb/cockroach/pkg/storage/enginepb" // split. The quantities known during a split (i.e. while the split trigger // is evaluating) are // -// - AbsPreSplitBothEstimated: the stats of the range before the split trigger, +// - AbsPreSplitBothStored: the stats of the range before the split trigger, // i.e. without accounting for any writes in the batch. This can have // ContainsEstimates set. // - DeltaBatchEstimated: the writes in the batch, i.e. the stats delta accrued @@ -84,14 +84,14 @@ import "github.com/cockroachdb/cockroach/pkg/storage/enginepb" // These two equations are easily solved for the unknowns. First, we can express // DeltaPostSplitLeft() in known quantities via (2) as // -// DeltaPostSplitLeft() = AbsPostSplitLeft - AbsPreSplitBothEstimated. +// DeltaPostSplitLeft() = AbsPostSplitLeft - AbsPreSplitBothStored. // // Note that if we start out with estimates, DeltaPostSplitLeft() will wipe out // those estimates when added to the absolute stats. // // For AbsPostSplitRight(), there are two cases. First, due to the identity // -// CombinedErrorDelta = AbsPreSplitBothEstimated + DeltaBatchEstimated +// CombinedErrorDelta = AbsPreSplitBothStored + DeltaBatchEstimated // -(AbsPostSplitLeft + AbsPostSplitRight) // + DeltaRangeKey. // @@ -113,21 +113,19 @@ type splitStatsHelper struct { absPostSplitRight *enginepb.MVCCStats } -// splitStatsScanFn scans a post-split keyspace to compute its stats. The -// computed stats should not contain estimates. -type splitStatsScanFn func() (enginepb.MVCCStats, error) +type getStatsFn func() (enginepb.MVCCStats, error) // splitStatsHelperInput is passed to makeSplitStatsHelper. type splitStatsHelperInput struct { - AbsPreSplitBothEstimated enginepb.MVCCStats - DeltaBatchEstimated enginepb.MVCCStats - DeltaRangeKey enginepb.MVCCStats - // AbsPostSplitLeftFn returns the stats for the left hand side of the - // split. - AbsPostSplitLeftFn splitStatsScanFn - // AbsPostSplitRightFn returns the stats for the right hand side of the - // split. - AbsPostSplitRightFn splitStatsScanFn + AbsPreSplitBothStored enginepb.MVCCStats + DeltaBatchEstimated enginepb.MVCCStats + DeltaRangeKey enginepb.MVCCStats + // PostSplitScanLeftFn returns the stats for the left hand side of the + // split computed by scanning relevant part of the range. + PostSplitScanLeftFn getStatsFn + // PostSplitScanRightFn returns the stats for the right hand side of the + // split computed by scanning relevant part of the range. + PostSplitScanRightFn getStatsFn // ScanRightFirst controls whether the left hand side or the right hand // side of the split is scanned first. In cases where neither of the // input stats contain estimates, this is the only side that needs to @@ -137,7 +135,7 @@ type splitStatsHelperInput struct { // makeSplitStatsHelper initializes a splitStatsHelper. The values in the input // are assumed to not change outside of the helper and must no longer be used. -// The provided AbsPostSplitLeftFn and AbsPostSplitRightFn recompute the left +// The provided PostSplitScanLeftFn and PostSplitScanRightFn recompute the left // and right hand sides of the split after accounting for the split trigger // batch. Each are only invoked at most once, and only when necessary. func makeSplitStatsHelper(input splitStatsHelperInput) (splitStatsHelper, error) { @@ -149,21 +147,21 @@ func makeSplitStatsHelper(input splitStatsHelperInput) (splitStatsHelper, error) var absPostSplitFirst enginepb.MVCCStats var err error if h.in.ScanRightFirst { - absPostSplitFirst, err = input.AbsPostSplitRightFn() + absPostSplitFirst, err = input.PostSplitScanRightFn() h.absPostSplitRight = &absPostSplitFirst } else { - absPostSplitFirst, err = input.AbsPostSplitLeftFn() + absPostSplitFirst, err = input.PostSplitScanLeftFn() h.absPostSplitLeft = &absPostSplitFirst } if err != nil { return splitStatsHelper{}, err } - if h.in.AbsPreSplitBothEstimated.ContainsEstimates == 0 && + if h.in.AbsPreSplitBothStored.ContainsEstimates == 0 && h.in.DeltaBatchEstimated.ContainsEstimates == 0 { // We have CombinedErrorDelta zero, so use arithmetic to compute the // stats for the second side. - ms := h.in.AbsPreSplitBothEstimated + ms := h.in.AbsPreSplitBothStored ms.Subtract(absPostSplitFirst) ms.Add(h.in.DeltaBatchEstimated) ms.Add(h.in.DeltaRangeKey) @@ -177,15 +175,15 @@ func makeSplitStatsHelper(input splitStatsHelperInput) (splitStatsHelper, error) // Estimates are contained in the input, so ask the oracle to scan to compute // the stats for the second side. We only scan the second side when either of - // the input stats above (AbsPreSplitBothEstimated or DeltaBatchEstimated) + // the input stats above (AbsPreSplitBothStored or DeltaBatchEstimated) // contains estimates, so that we can guarantee that the post-splits stats // don't. var absPostSplitSecond enginepb.MVCCStats if h.in.ScanRightFirst { - absPostSplitSecond, err = input.AbsPostSplitLeftFn() + absPostSplitSecond, err = input.PostSplitScanLeftFn() h.absPostSplitLeft = &absPostSplitSecond } else { - absPostSplitSecond, err = input.AbsPostSplitRightFn() + absPostSplitSecond, err = input.PostSplitScanRightFn() h.absPostSplitRight = &absPostSplitSecond } if err != nil { @@ -214,7 +212,7 @@ func (h splitStatsHelper) DeltaPostSplitLeft() enginepb.MVCCStats { // the helper, we can make that work, too. // NB: note how none of this depends on mutations to absPostSplitRight. ms := *h.absPostSplitLeft - ms.Subtract(h.in.AbsPreSplitBothEstimated) + ms.Subtract(h.in.AbsPreSplitBothStored) return ms }