Skip to content

Commit

Permalink
storage: recompute and adjust stats via consistency checker
Browse files Browse the repository at this point in the history
A number of bugs in our MVCCStats logic has been fixed recently (see for example
\cockroachdb#20996) and others are still present (cockroachdb#20554). For both, and also potentially for
future bugs or deliberate adjustments of the computations, we need a mechanism
to recompute the stats in order to purge incorrect numbers from the system over
time. Such a mechanism is introduced here.

It consists of two main components:

- A new RPC `AdjustStats`, which applies to a single Range and computes the difference
  between the persisted stats and the recomputed ones; it can "inject" a suitable delta
  into the stats (thus fixing the divergence) or do a "dry run".
- A trigger in the consistency checker that runs on the coordinating node (the lease
  holder). The consistency checker already recomputes the stats, and it can compare
  them against the persisted stats and judge whether there is a divergence. If there
  is one, naively one may hope to just insert the newly computed diff into the range,
  but this is not ideal due to concerns about double application and racing consistency
  checks. Instead, use `AdjustStats` (which, of course, was introduced for this purpose)
  which strikes a balance between efficiency and correctness.

Updates cockroachdb#20554.

Release note (general change): added a mechanism to recompute range stats on demand.
  • Loading branch information
tbg committed Jan 9, 2018
1 parent 8222e5d commit 633d8f5
Show file tree
Hide file tree
Showing 19 changed files with 1,903 additions and 862 deletions.
2 changes: 1 addition & 1 deletion pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ func runDebugCheckStoreDescriptors(ctx context.Context, db *engine.RocksDB) erro
if err != nil {
return false, err
}
ms, err := storage.ComputeStatsForRange(&desc, db, claimedMS.LastUpdateNanos)
ms, err := rditer.ComputeStatsForRange(&desc, db, claimedMS.LastUpdateNanos)
if err != nil {
return false, err
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/roachpb/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,9 @@ func (*AdminScatterRequest) Method() Method { return AdminScatter }
// Method implements the Request interface.
func (*AddSSTableRequest) Method() Method { return AddSSTable }

// Method implements the Request interface.
func (*AdjustStatsRequest) Method() Method { return AdjustStats }

// ShallowCopy implements the Request interface.
func (gr *GetRequest) ShallowCopy() Request {
shallowCopy := *gr
Expand Down Expand Up @@ -734,6 +737,12 @@ func (r *AddSSTableRequest) ShallowCopy() Request {
return &shallowCopy
}

// ShallowCopy implements the Request interface.
func (r *AdjustStatsRequest) ShallowCopy() Request {
shallowCopy := *r
return &shallowCopy
}

// NewGet returns a Request initialized to get the value at key.
func NewGet(key Key) Request {
return &GetRequest{
Expand Down Expand Up @@ -954,6 +963,7 @@ func (*TransferLeaseRequest) flags() int {
// lease holder.
return isWrite | isAlone | skipLeaseCheck
}
func (*AdjustStatsRequest) flags() int { return isWrite | isAlone }
func (*ComputeChecksumRequest) flags() int { return isWrite | isRange }
func (*DeprecatedVerifyChecksumRequest) flags() int { return isWrite }
func (*CheckConsistencyRequest) flags() int { return isAdmin | isRange }
Expand Down
2,168 changes: 1,352 additions & 816 deletions pkg/roachpb/api.pb.go

Large diffs are not rendered by default.

31 changes: 31 additions & 0 deletions pkg/roachpb/api.proto
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,35 @@ message CheckConsistencyResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}

// An AdjustStatsRequest triggers a stats recomputation on the Range addressed by
// the request.
//
// Since this request targets a specific Range, the start key must equal the
// start key of the target Range.
//
// The stats recomputation touches essentially the whole range, but the command
// avoids having to block other commands by taking care to not interleave
// with splits, and by using the commutativity of stats updates. As a result,
// it is safe to invoke at any time, including repeatedly, though it should be
// used conservatively due to performing a full scan of the Range.
message AdjustStatsRequest {
option (gogoproto.equal) = true;

Span header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
bytes computation_end_key = 2 [(gogoproto.casttype) = "Key"];
// When dry_run is true, the stats delta is computed, but no stats adjustment
// is performed.
bool dry_run = 3;
}

// An AdjustStatsResponse is the response to an AdjustStatsRequest.
message AdjustStatsResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];

// added_delta is the adjustment made to the range's stats, i.e. `new_stats = old_stats + added_delta`.
storage.engine.enginepb.MVCCNetworkStats added_delta = 2 [(gogoproto.nullable) = false];
}

// A BeginTransactionRequest is the argument to the BeginTransaction() method.
message BeginTransactionRequest {
option (gogoproto.equal) = true;
Expand Down Expand Up @@ -1150,6 +1179,7 @@ message RequestUnion {
QueryTxnRequest query_txn = 33;
AdminScatterRequest admin_scatter = 36;
AddSSTableRequest add_sstable = 37;
AdjustStatsRequest adjust_stats = 39;
}

// A ResponseUnion contains exactly one of the responses.
Expand Down Expand Up @@ -1200,6 +1230,7 @@ message ResponseUnion {
QueryTxnResponse query_txn = 33;
AdminScatterResponse admin_scatter = 36;
AddSSTableResponse add_sstable = 37;
AdjustStatsResponse adjust_stats = 39;
}

// A Header is attached to a BatchRequest, encapsulating routing and auxiliary
Expand Down
12 changes: 11 additions & 1 deletion pkg/roachpb/batch_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pkg/roachpb/method.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,6 @@ const (
AdminScatter
// AddSSTable links a file into the RocksDB log-structured merge-tree.
AddSSTable
// AdjustStats applies a delta to a Range's MVCCStats to fix computational errors.
AdjustStats
)
4 changes: 2 additions & 2 deletions pkg/roachpb/method_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

109 changes: 78 additions & 31 deletions pkg/storage/api.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pkg/storage/api.proto
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ option go_package = "storage";

import "roachpb/internal_raft.proto";
import "roachpb/metadata.proto";
import "storage/engine/enginepb/mvcc3.proto";
import "gogoproto/gogo.proto";

// StoreRequestHeader locates a Store on a Node.
Expand Down Expand Up @@ -49,6 +50,8 @@ message CollectChecksumResponse {
// TODO(tschottdorf): with larger ranges, this is no longer tenable.
// See https://github.com/cockroachdb/cockroach/issues/21128.
roachpb.RaftSnapshotData snapshot = 2;
// delta carries the stats of the range minus the recomputed stats.
storage.engine.enginepb.MVCCNetworkStats delta = 3 [(gogoproto.nullable) = false];
}

service Consistency {
Expand Down
Loading

0 comments on commit 633d8f5

Please sign in to comment.