Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kvclient: fix rangefeed retry reason counters #133991

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/generated/metrics/metrics.html
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,7 @@
<tr><td>APPLICATION</td><td>distsender.rangefeed.retry.send</td><td>Number of ranges that encountered retryable send error</td><td>Ranges</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>distsender.rangefeed.retry.slow_consumer</td><td>Number of ranges that encountered retryable SLOW_CONSUMER error</td><td>Ranges</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>distsender.rangefeed.retry.store_not_found</td><td>Number of ranges that encountered retryable store not found error</td><td>Ranges</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>distsender.rangefeed.retry.unknown</td><td>Number of ranges that encountered retryable unknown error</td><td>Ranges</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>distsender.rangefeed.total_ranges</td><td>Number of ranges executing rangefeed<br/><br/>This counts the number of ranges with an active rangefeed.<br/></td><td>Ranges</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>APPLICATION</td><td>distsender.rangelookups</td><td>Number of range lookups</td><td>Range Lookups</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>distsender.rpc.addsstable.sent</td><td>Number of AddSSTable requests processed.<br/><br/>This counts the requests in batches handed to DistSender, not the RPCs<br/>sent to individual Ranges as a result.</td><td>RPCs</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
Expand Down
35 changes: 11 additions & 24 deletions pkg/kv/kvclient/kvcoord/dist_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -511,24 +511,25 @@ func makeDistSenderCircuitBreakerMetrics() DistSenderCircuitBreakerMetrics {
type rangeFeedErrorCounters struct {
RangefeedRestartRanges *metric.Counter
RangefeedErrorCatchup *metric.Counter
RetryErrors []*metric.Counter
RetryErrors map[int32]*metric.Counter
SendErrors *metric.Counter
StoreNotFound *metric.Counter
NodeNotFound *metric.Counter
RangeNotFound *metric.Counter
RangeKeyMismatch *metric.Counter
RetryUnknown *metric.Counter
}

func makeRangeFeedErrorCounters() rangeFeedErrorCounters {
var retryCounters []*metric.Counter
for name := range kvpb.RangeFeedRetryError_Reason_value {
retryCounters := make(map[int32]*metric.Counter, len(kvpb.RangeFeedRetryError_Reason_value))
for name, idx := range kvpb.RangeFeedRetryError_Reason_value {
name = strings.TrimPrefix(name, "REASON_")
retryCounters = append(retryCounters, metric.NewCounter(metric.Metadata{
retryCounters[idx] = metric.NewCounter(metric.Metadata{
Name: fmt.Sprintf("distsender.rangefeed.retry.%s", strings.ToLower(name)),
Help: fmt.Sprintf(`Number of ranges that encountered retryable %s error`, name),
Measurement: "Ranges",
Unit: metric.Unit_COUNT,
}))
})
}

retryMeta := func(name string) metric.Metadata {
Expand All @@ -549,6 +550,7 @@ func makeRangeFeedErrorCounters() rangeFeedErrorCounters {
NodeNotFound: metric.NewCounter(retryMeta("node not found")),
RangeNotFound: metric.NewCounter(retryMeta("range not found")),
RangeKeyMismatch: metric.NewCounter(retryMeta("range key mismatch")),
RetryUnknown: metric.NewCounter(retryMeta("unknown")),
}
}

Expand All @@ -558,25 +560,10 @@ func makeRangeFeedErrorCounters() rangeFeedErrorCounters {
func (c rangeFeedErrorCounters) GetRangeFeedRetryCounter(
reason kvpb.RangeFeedRetryError_Reason,
) *metric.Counter {
// Normally, retry reason values are contiguous. One way gaps could be
// introduced, is if some retry reasons are retired (deletions are
// accomplished by reserving enum value to prevent its re-use), and then more
// reason added after. Then, we can't use reason value as an index into
// retryCounters. Because this scenario is believed to be very unlikely, we
// forego any fancy re-mapping schemes, and instead opt for explicit handling.
switch reason {
case kvpb.RangeFeedRetryError_REASON_REPLICA_REMOVED,
kvpb.RangeFeedRetryError_REASON_RANGE_SPLIT,
kvpb.RangeFeedRetryError_REASON_MANUAL_RANGE_SPLIT,
kvpb.RangeFeedRetryError_REASON_RANGE_MERGED,
kvpb.RangeFeedRetryError_REASON_RAFT_SNAPSHOT,
kvpb.RangeFeedRetryError_REASON_LOGICAL_OPS_MISSING,
kvpb.RangeFeedRetryError_REASON_SLOW_CONSUMER,
kvpb.RangeFeedRetryError_REASON_NO_LEASEHOLDER,
kvpb.RangeFeedRetryError_REASON_RANGEFEED_CLOSED:
return c.RetryErrors[reason]
default:
panic(errors.AssertionFailedf("unknown retry reason %d", reason))
if metric, ok := c.RetryErrors[int32(reason)]; ok {
return metric
} else {
return c.RetryUnknown
}
}

Expand Down
20 changes: 13 additions & 7 deletions pkg/util/metric/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ func (r *Registry) AddMetricStruct(metricStruct interface{}) {
}
t := v.Type()

const allowNil = true
const disallowNil = false
for i := 0; i < v.NumField(); i++ {
vfield, tfield := v.Field(i), t.Field(i)
tname := tfield.Name
Expand All @@ -137,14 +139,18 @@ func (r *Registry) AddMetricStruct(metricStruct interface{}) {
for i := 0; i < vfield.Len(); i++ {
velem := vfield.Index(i)
telemName := fmt.Sprintf("%s[%d]", tname, i)
// Permit elements in the array to be nil.
const skipNil = true
r.addMetricValue(ctx, velem, telemName, skipNil, t)
r.addMetricValue(ctx, velem, telemName, allowNil, t)
}
case reflect.Map:
iter := vfield.MapRange()
for iter.Next() {
// telemName is only used for assertion errors.
telemName := iter.Key().String()
r.addMetricValue(ctx, iter.Value(), telemName, allowNil, t)
}
default:
// No metric fields should be nil.
const skipNil = false
r.addMetricValue(ctx, vfield, tname, skipNil, t)
r.addMetricValue(ctx, vfield, tname, disallowNil, t)
}
}
}
Expand Down Expand Up @@ -274,7 +280,7 @@ func checkFieldCanBeSkipped(
}

switch fieldType.Kind() {
case reflect.Array, reflect.Slice:
case reflect.Array, reflect.Slice, reflect.Map:
checkFieldCanBeSkipped(skipReason, fieldName, fieldType.Elem(), parentType)
case reflect.Struct:
containsMetrics := false
Expand Down Expand Up @@ -317,7 +323,7 @@ func containsMetricType(ft reflect.Type) bool {
}

switch ft.Kind() {
case reflect.Slice, reflect.Array:
case reflect.Slice, reflect.Array, reflect.Map:
return containsMetricType(ft.Elem())
case reflect.Struct:
for i := 0; i < ft.NumField(); i++ {
Expand Down
7 changes: 7 additions & 0 deletions pkg/util/metric/registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ func TestRegistry(t *testing.T) {
StructHistogram IHistogram
NestedStructGauge NestedStruct
ArrayStructCounters [4]*Counter
MapOfCounters map[int32]*Counter
// Ensure that nil struct values in arrays are safe.
NestedStructArray [2]*NestedStruct
// A few extra ones: either not exported, or not metric objects.
Expand Down Expand Up @@ -125,6 +126,10 @@ func TestRegistry(t *testing.T) {
nil, // skipped
NewCounter(Metadata{Name: "array.struct.counter.3"}),
},
MapOfCounters: map[int32]*Counter{
1: NewCounter(Metadata{Name: "map.counter.0"}),
2: NewCounter(Metadata{Name: "map.counter.1"}),
},
NestedStructArray: [2]*NestedStruct{
0: nil, // skipped
1: {
Expand Down Expand Up @@ -154,6 +159,8 @@ func TestRegistry(t *testing.T) {
"array.struct.counter.0": {},
"array.struct.counter.1": {},
"array.struct.counter.3": {},
"map.counter.0": {},
"map.counter.1": {},
"nested.struct.array.1.gauge": {},
}
totalMetrics := len(expNames)
Expand Down
Loading