-
Notifications
You must be signed in to change notification settings - Fork 3.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
storage: aggregate iterator stats #99726
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -223,11 +223,11 @@ func BenchmarkMVCCGet_Pebble(b *testing.B) { | |
ctx := context.Background() | ||
for _, batch := range []bool{false, true} { | ||
b.Run(fmt.Sprintf("batch=%t", batch), func(b *testing.B) { | ||
for _, numVersions := range []int{1, 10, 100} { | ||
for _, numVersions := range []int{10} { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbowens did you intend to merge this change to the benchmark? |
||
b.Run(fmt.Sprintf("versions=%d", numVersions), func(b *testing.B) { | ||
for _, valueSize := range []int{8} { | ||
b.Run(fmt.Sprintf("valueSize=%d", valueSize), func(b *testing.B) { | ||
for _, numRangeKeys := range []int{0, 1, 100} { | ||
for _, numRangeKeys := range []int{0} { | ||
b.Run(fmt.Sprintf("numRangeKeys=%d", numRangeKeys), func(b *testing.B) { | ||
runMVCCGet(ctx, b, mvccBenchData{ | ||
numVersions: numVersions, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -361,9 +361,9 @@ type EngineIterator interface { | |
// Value returns the current value as a byte slice. | ||
// REQUIRES: latest positioning function returned valid=true. | ||
Value() ([]byte, error) | ||
// GetRawIter is a low-level method only for use in the storage package, | ||
// that returns the underlying pebble Iterator. | ||
GetRawIter() pebbleiter.Iterator | ||
// CloneContext is a low-level method only for use in the storage package, | ||
// that provides sufficient context that the iterator may be cloned. | ||
CloneContext() CloneContext | ||
// SeekEngineKeyGEWithLimit is similar to SeekEngineKeyGE, but takes an | ||
// additional exclusive upper limit parameter. The limit is semantically | ||
// best-effort, and is an optimization to avoid O(n^2) iteration behavior in | ||
|
@@ -388,6 +388,13 @@ type EngineIterator interface { | |
Stats() IteratorStats | ||
} | ||
|
||
// CloneContext is an opaque type encapsulating sufficient context to construct | ||
// a clone of an existing iterator. | ||
type CloneContext struct { | ||
rawIter pebbleiter.Iterator | ||
statsReporter statsReporter | ||
} | ||
|
||
// IterOptions contains options used to create an {MVCC,Engine}Iterator. | ||
// | ||
// For performance, every {MVCC,Engine}Iterator must specify either Prefix or | ||
|
@@ -462,6 +469,7 @@ type IterOptions struct { | |
// Range keys themselves are not affected by the masking, and will be | ||
// emitted as normal. | ||
RangeKeyMaskingBelow hlc.Timestamp | ||
|
||
// useL6Filters allows the caller to opt into reading filter blocks for | ||
// L6 sstables. Only for use with Prefix = true. Helpful if a lot of prefix | ||
// Seeks are expected in quick succession, that are also likely to not | ||
|
@@ -1013,15 +1021,7 @@ type WriteBatch interface { | |
// *pebble.Metrics struct, which has its own documentation. | ||
type Metrics struct { | ||
*pebble.Metrics | ||
// WriteStallCount counts the number of times Pebble intentionally delayed | ||
// incoming writes. Currently, the only two reasons for this to happen are: | ||
// - "memtable count limit reached" | ||
// - "L0 file count limit exceeded" | ||
// | ||
// We do not split this metric across these two reasons, but they can be | ||
// distinguished in the pebble logs. | ||
WriteStallCount int64 | ||
WriteStallDuration time.Duration | ||
Iterator AggregatedIteratorStats | ||
// DiskSlowCount counts the number of times Pebble records disk slowness. | ||
DiskSlowCount int64 | ||
// DiskStallCount counts the number of times Pebble observes slow writes | ||
|
@@ -1031,6 +1031,56 @@ type Metrics struct { | |
SharedStorageWriteBytes int64 | ||
// SharedStorageReadBytes counts the number of bytes read from shared storage. | ||
SharedStorageReadBytes int64 | ||
// WriteStallCount counts the number of times Pebble intentionally delayed | ||
// incoming writes. Currently, the only two reasons for this to happen are: | ||
// - "memtable count limit reached" | ||
// - "L0 file count limit exceeded" | ||
// | ||
// We do not split this metric across these two reasons, but they can be | ||
// distinguished in the pebble logs. | ||
WriteStallCount int64 | ||
WriteStallDuration time.Duration | ||
} | ||
|
||
// AggregatedIteratorStats holds cumulative stats, collected and summed over all | ||
// of an engine's iterators. | ||
type AggregatedIteratorStats struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be helpful to reference this type in the help text for each of the metrics as a pointer for more detailed semantics of what's tracked within each. |
||
// BlockBytes holds the sum of sizes of all loaded blocks. If the block was | ||
// compressed, this is the compressed bytes. This value includes blocks that | ||
// were loaded from the cache, and bytes that needed to be read from | ||
// persistent storage. | ||
// | ||
// Currently, there may be some gaps in coverage. (At the time of writing, | ||
// 2nd-level index blocks are excluded.) | ||
BlockBytes uint64 | ||
// BlockBytesInCache holds the subset of BlockBytes that were already in the | ||
// block cache, requiring no I/O. | ||
BlockBytesInCache uint64 | ||
// BlockReadDuration accumulates the duration spent fetching blocks due to | ||
// block cache misses. | ||
// | ||
// Currently, there may be some gaps in coverage. (At the time of writing, | ||
// range deletion and range key blocks, meta index blocks and properties | ||
// blocks are all excluded.) | ||
BlockReadDuration time.Duration | ||
// ExternalSeeks is the total count of seeks in forward and backward | ||
// directions performed on pebble.Iterators. | ||
ExternalSeeks int | ||
// ExternalSteps is the total count of relative positioning operations (eg, | ||
// Nexts, Prevs, NextPrefix, NextWithLimit, etc) in forward and backward | ||
// directions performed on pebble.Iterators. | ||
ExternalSteps int | ||
// InternalSeeks is the total count of steps in forward and backward | ||
// directions performed on Pebble's internal iterator. If this is high | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "If this is high..." could you copy this to the metric help text? |
||
// relative to ExternalSeeks, it's a good indication that there's an | ||
// accumulation of garbage within the LSM (NOT MVCC garbage). | ||
InternalSeeks int | ||
// InternalSteps is the total count of relative positioning operations (eg, | ||
// Nexts, Prevs, NextPrefix, etc) in forward and backward directions | ||
// performed on pebble's internal iterator. If this is high relative to | ||
// ExternalSteps, it's a good indication that there's an accumulation of | ||
// garbage within the LSM (NOT MVCC garbage). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto about "If this is high..." |
||
InternalSteps int | ||
} | ||
|
||
// MetricsForInterval is a set of pebble.Metrics that need to be saved in order to | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was expecting them all to be counters. They are all gauges. What's going on?