Skip to content

Commit

Permalink
Merge #25355
Browse files Browse the repository at this point in the history
25355: storage: introduce metrics for intent resolutions r=nvanbenschoten a=tschottdorf

In particular, this tracks poisoning aborts, which create abort span
entries (see #25233).

Some refactoring to make it easier to add such counters in the future
was carried out.

Release note: None

Co-authored-by: Tobias Schottdorf <[email protected]>
  • Loading branch information
craig[bot] and tbg committed Aug 16, 2018
2 parents 7d89a7d + 8f5cd02 commit 2d8938b
Show file tree
Hide file tree
Showing 11 changed files with 267 additions and 98 deletions.
12 changes: 6 additions & 6 deletions pkg/storage/batcheval/cmd_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ func declareKeysRequestLease(

func newFailedLeaseTrigger(isTransfer bool) result.Result {
var trigger result.Result
trigger.Local.LeaseMetricsResult = new(result.LeaseMetricsType)
trigger.Local.Metrics = new(result.Metrics)
if isTransfer {
*trigger.Local.LeaseMetricsResult = result.LeaseTransferError
trigger.Local.Metrics.LeaseTransferError = 1
} else {
*trigger.Local.LeaseMetricsResult = result.LeaseRequestError
trigger.Local.Metrics.LeaseRequestError = 1
}
return trigger
}
Expand Down Expand Up @@ -157,11 +157,11 @@ func evalNewLease(
// the merge aborts.)
pd.Local.MaybeWatchForMerge = true

pd.Local.LeaseMetricsResult = new(result.LeaseMetricsType)
pd.Local.Metrics = new(result.Metrics)
if isTransfer {
*pd.Local.LeaseMetricsResult = result.LeaseTransferSuccess
pd.Local.Metrics.LeaseTransferSuccess = 1
} else {
*pd.Local.LeaseMetricsResult = result.LeaseRequestSuccess
pd.Local.Metrics.LeaseRequestSuccess = 1
}
return pd, nil
}
24 changes: 22 additions & 2 deletions pkg/storage/batcheval/cmd_resolve_intent.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,20 @@ func declareKeysResolveIntent(
declareKeysResolveIntentCombined(desc, header, req, spans)
}

func resolveToMetricType(status roachpb.TransactionStatus, poison bool) *result.Metrics {
var typ result.Metrics
if WriteAbortSpanOnResolve(status) {
if poison {
typ.ResolvePoison = 1
} else {
typ.ResolveAbort = 1
}
} else {
typ.ResolveCommit = 1
}
return &typ
}

// ResolveIntent resolves a write intent from the specified key
// according to the status of the transaction which created it.
func ResolveIntent(
Expand All @@ -75,8 +89,14 @@ func ResolveIntent(
if err := engine.MVCCResolveWriteIntent(ctx, batch, ms, intent); err != nil {
return result.Result{}, err
}

var res result.Result
res.Local.Metrics = resolveToMetricType(args.Status, args.Poison)

if WriteAbortSpanOnResolve(args.Status) {
return result.Result{}, SetAbortSpan(ctx, cArgs.EvalCtx, batch, ms, args.IntentTxn, args.Poison)
if err := SetAbortSpan(ctx, cArgs.EvalCtx, batch, ms, args.IntentTxn, args.Poison); err != nil {
return result.Result{}, err
}
}
return result.Result{}, nil
return res, nil
}
10 changes: 8 additions & 2 deletions pkg/storage/batcheval/cmd_resolve_intent_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,14 @@ func ResolveIntentRange(
reply.ResumeSpan = resumeSpan
reply.ResumeReason = roachpb.RESUME_KEY_LIMIT
}

var res result.Result
res.Local.Metrics = resolveToMetricType(args.Status, args.Poison)

if WriteAbortSpanOnResolve(args.Status) {
return result.Result{}, SetAbortSpan(ctx, cArgs.EvalCtx, batch, ms, args.IntentTxn, args.Poison)
if err := SetAbortSpan(ctx, cArgs.EvalCtx, batch, ms, args.IntentTxn, args.Poison); err != nil {
return result.Result{}, err
}
}
return result.Result{}, nil
return res, nil
}
27 changes: 0 additions & 27 deletions pkg/storage/batcheval/result/lease_metrics.go

This file was deleted.

38 changes: 38 additions & 0 deletions pkg/storage/batcheval/result/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2014 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package result

// Metrics tracks various counters related to command applications and
// their outcomes.
type Metrics struct {
LeaseRequestSuccess int // lease request evaluated successfully
LeaseRequestError int // lease request error at evaluation time
LeaseTransferSuccess int // lease transfer evaluated successfully
LeaseTransferError int // lease transfer error at evaluation time
ResolveCommit int // intent commit evaluated successfully
ResolveAbort int // non-poisoning intent abort evaluated successfully
ResolvePoison int // poisoning intent abort evaluated successfully
}

// Add absorbs the supplied Metrics into the receiver.
func (mt *Metrics) Add(o Metrics) {
mt.LeaseRequestSuccess += o.LeaseRequestSuccess
mt.LeaseRequestError += o.LeaseRequestError
mt.LeaseTransferSuccess += o.LeaseTransferSuccess
mt.LeaseTransferError += o.LeaseTransferError
mt.ResolveCommit += o.ResolveCommit
mt.ResolveAbort += o.ResolveAbort
mt.ResolvePoison += o.ResolvePoison
}
21 changes: 8 additions & 13 deletions pkg/storage/batcheval/result/result.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,9 @@ type LocalResult struct {
// commit fails, or we may accidentally make uncommitted values
// live.
EndTxns *[]EndTxnIntents
// Whether we successfully or non-successfully requested a lease.
//
// TODO(tschottdorf): Update this counter correctly with prop-eval'ed KV
// in the following case:
// - proposal does not fail fast and goes through Raft
// - downstream-of-Raft logic identifies a conflict and returns an error
// The downstream-of-Raft logic does not exist at time of writing.
LeaseMetricsResult *LeaseMetricsType
// Metrics contains counters which are to be passed to the
// metrics subsystem.
Metrics *Metrics

// When set (in which case we better be the first range), call
// GossipFirstRange if the Replica holds the lease.
Expand Down Expand Up @@ -309,12 +304,12 @@ func (p *Result) MergeAndDestroy(q Result) error {
}
q.Local.EndTxns = nil

if p.Local.LeaseMetricsResult == nil {
p.Local.LeaseMetricsResult = q.Local.LeaseMetricsResult
} else if q.Local.LeaseMetricsResult != nil {
return errors.New("conflicting LeaseMetricsResult")
if p.Local.Metrics == nil {
p.Local.Metrics = q.Local.Metrics
} else if q.Local.Metrics != nil {
p.Local.Metrics.Add(*q.Local.Metrics)
}
q.Local.LeaseMetricsResult = nil
q.Local.Metrics = nil

if p.Local.MaybeGossipNodeLiveness == nil {
p.Local.MaybeGossipNodeLiveness = q.Local.MaybeGossipNodeLiveness
Expand Down
23 changes: 23 additions & 0 deletions pkg/storage/batcheval/result/result_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,26 @@ func TestEvalResultIsZero(t *testing.T) {
}
}
}

func TestMergeAndDestroy(t *testing.T) {
var r0, r1, r2 Result
r1.Local.Metrics = new(Metrics)
r2.Local.Metrics = new(Metrics)

r1.Local.Metrics.LeaseRequestSuccess = 7

r2.Local.Metrics.ResolveAbort = 13
r2.Local.Metrics.LeaseRequestSuccess = 2

if err := r0.MergeAndDestroy(r1); err != nil {
t.Fatal(err)
}

if err := r0.MergeAndDestroy(r2); err != nil {
t.Fatal(err)
}

if f, exp := *r1.Local.Metrics, (Metrics{LeaseRequestSuccess: 9, ResolveAbort: 13}); f != exp {
t.Fatalf("expected %d, got %d", exp, f)
}
}
82 changes: 82 additions & 0 deletions pkg/storage/client_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,13 @@ import (
"github.com/pkg/errors"

"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/storage/batcheval/result"
"github.com/cockroachdb/cockroach/pkg/storage/engine/enginepb"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/metric"
)
Expand Down Expand Up @@ -152,6 +156,84 @@ func verifyRocksDBStats(t *testing.T, s *storage.Store) {
}
}

// TestStoreResolveMetrics verifies that metrics related to intent resolution
// are tracked properly.
func TestStoreResolveMetrics(t *testing.T) {
defer leaktest.AfterTest(t)()

// First prevent rot that would result from adding fields without handling
// them everywhere.
{
act := fmt.Sprintf("%+v", result.Metrics{})
exp := "{LeaseRequestSuccess:0 LeaseRequestError:0 LeaseTransferSuccess:0 LeaseTransferError:0 ResolveCommit:0 ResolveAbort:0 ResolvePoison:0}"
if act != exp {
t.Errorf("need to update this test due to added fields: %v", act)
}
}

mtc := &multiTestContext{}
defer mtc.Stop()
mtc.Start(t, 1)

ctx := context.Background()

span := roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("b")}

txn := roachpb.MakeTransaction("foo", span.Key, roachpb.MinUserPriority, enginepb.SERIALIZABLE, hlc.Timestamp{WallTime: 123}, 999)

const resolveCommitCount = int64(200)
const resolveAbortCount = int64(800)
const resolvePoisonCount = int64(2400)

var ba roachpb.BatchRequest
{
repl := mtc.stores[0].LookupReplica(keys.MustAddr(span.Key))
var err error
if ba.Replica, err = repl.GetReplicaDescriptor(); err != nil {
t.Fatal(err)
}
ba.RangeID = repl.RangeID
}

add := func(status roachpb.TransactionStatus, poison bool, n int64) {
for i := int64(0); i < n; i++ {
key := span.Key
endKey := span.EndKey
if i > n/2 {
req := &roachpb.ResolveIntentRangeRequest{
IntentTxn: txn.TxnMeta, Status: status, Poison: poison,
}
req.Key, req.EndKey = key, endKey
ba.Add(req)
continue
}
req := &roachpb.ResolveIntentRequest{
IntentTxn: txn.TxnMeta, Status: status, Poison: poison,
}
req.Key = key
ba.Add(req)
}
}

add(roachpb.COMMITTED, false, resolveCommitCount)
add(roachpb.ABORTED, false, resolveAbortCount)
add(roachpb.ABORTED, true, resolvePoisonCount)

if _, pErr := mtc.senders[0].Send(ctx, ba); pErr != nil {
t.Fatal(pErr)
}

if exp, act := resolveCommitCount, mtc.stores[0].Metrics().ResolveCommitCount.Count(); act < exp || act > exp+50 {
t.Errorf("expected around %d intent commits, saw %d", exp, act)
}
if exp, act := resolveAbortCount, mtc.stores[0].Metrics().ResolveAbortCount.Count(); act < exp || act > exp+50 {
t.Errorf("expected around %d intent aborts, saw %d", exp, act)
}
if exp, act := resolvePoisonCount, mtc.stores[0].Metrics().ResolvePoisonCount.Count(); act < exp || act > exp+50 {
t.Errorf("expected arounc %d abort span poisonings, saw %d", exp, act)
}
}

func TestStoreMetrics(t *testing.T) {
defer leaktest.AfterTest(t)()

Expand Down
Loading

0 comments on commit 2d8938b

Please sign in to comment.