From 481da04154d6b66972944e45127eeffed8384e7f Mon Sep 17 00:00:00 2001 From: sumeerbhola Date: Wed, 1 Nov 2023 11:56:41 -0400 Subject: [PATCH] db: add KeyStatistics.LatestKindsCount Also give one example of how to interpret these statistics. And clarify that SnapshotPinned* are not counting what they are claiming to count. --- db.go | 43 +++++++++++++++++++++++++++++++++++----- scan_internal_test.go | 6 ++++++ testdata/scan_statistics | 12 +++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/db.go b/db.go index 5a7e28b76d..5ac5d7585c 100644 --- a/db.go +++ b/db.go @@ -2794,14 +2794,40 @@ func (d *DB) SetCreatorID(creatorID uint64) error { // KeyStatistics keeps track of the number of keys that have been pinned by a // snapshot as well as counts of the different key kinds in the lsm. +// +// One way of using the accumulated stats, when we only have sets and dels, +// and say the counts are represented as del_count, set_count, +// del_latest_count, set_latest_count, snapshot_pinned_count. +// +// - del_latest_count + set_latest_count is the set of unique user keys +// (unique). +// +// - set_latest_count is the set of live unique user keys (live_unique). +// +// - Garbage is del_count + set_count - live_unique. +// +// - If everything were in the LSM, del_count+set_count-snapshot_pinned_count +// would also be the set of unique user keys (note that +// snapshot_pinned_count is counting something different -- see comment below). +// But snapshot_pinned_count only counts keys in the LSM so the excess here +// must be keys in memtables. type KeyStatistics struct { - // when a compaction determines a key is obsolete, but cannot elide the key - // because it's required by an open snapshot. + // TODO(sumeer): the SnapshotPinned* are incorrect in that these older + // versions can be in a different level. Either fix the accounting or + // rename these fields. + + // SnapshotPinnedKeys represents obsolete keys that cannot be elided during + // a compaction, because they are required by an open snapshot. SnapshotPinnedKeys int - // the total number of bytes of all snapshot pinned keys. + // SnapshotPinnedKeysBytes is the total number of bytes of all snapshot + // pinned keys. SnapshotPinnedKeysBytes uint64 - // Note: these fields are currently only populated for point keys (including range deletes). + // KindsCount is the count for each kind of key. It includes point keys, + // range deletes and range keys. KindsCount [InternalKeyKindMax + 1]int + // LatestKindsCount is the count for each kind of key when it is the latest + // kind for a user key. It is only populated for point keys. + LatestKindsCount [InternalKeyKindMax + 1]int } // LSMKeyStatistics is used by DB.ScanStatistics. @@ -2846,7 +2872,8 @@ func (d *DB) ScanStatistics( // pinned by a snapshot. size := uint64(key.Size()) kind := key.Kind() - if iterInfo.Kind == IteratorLevelLSM && d.equal(prevKey.UserKey, key.UserKey) { + sameKey := d.equal(prevKey.UserKey, key.UserKey) + if iterInfo.Kind == IteratorLevelLSM && sameKey { stats.Levels[iterInfo.Level].SnapshotPinnedKeys++ stats.Levels[iterInfo.Level].SnapshotPinnedKeysBytes += size stats.Accumulated.SnapshotPinnedKeys++ @@ -2855,6 +2882,12 @@ func (d *DB) ScanStatistics( if iterInfo.Kind == IteratorLevelLSM { stats.Levels[iterInfo.Level].KindsCount[kind]++ } + if !sameKey { + if iterInfo.Kind == IteratorLevelLSM { + stats.Levels[iterInfo.Level].LatestKindsCount[kind]++ + } + stats.Accumulated.LatestKindsCount[kind]++ + } stats.Accumulated.KindsCount[kind]++ prevKey.CopyFrom(*key) diff --git a/scan_internal_test.go b/scan_internal_test.go index 5d0295e5fa..6a43fc6208 100644 --- a/scan_internal_test.go +++ b/scan_internal_test.go @@ -182,6 +182,9 @@ func TestScanStatistics(t *testing.T) { } for _, kind := range keyKindsToDisplay { fmt.Fprintf(&b, " %s key count: %d\n", kind.String(), stats.Levels[lvl].KindsCount[kind]) + if stats.Levels[lvl].LatestKindsCount[kind] > 0 { + fmt.Fprintf(&b, " %s latest count: %d\n", kind.String(), stats.Levels[lvl].LatestKindsCount[kind]) + } } } @@ -191,6 +194,9 @@ func TestScanStatistics(t *testing.T) { } for _, kind := range keyKindsToDisplay { fmt.Fprintf(&b, " %s key count: %d\n", kind.String(), stats.Accumulated.KindsCount[kind]) + if stats.Accumulated.LatestKindsCount[kind] > 0 { + fmt.Fprintf(&b, " %s latest count: %d\n", kind.String(), stats.Accumulated.LatestKindsCount[kind]) + } } return b.String() default: diff --git a/testdata/scan_statistics b/testdata/scan_statistics index 79620147dd..a080099644 100644 --- a/testdata/scan_statistics +++ b/testdata/scan_statistics @@ -12,6 +12,7 @@ scan-statistics lower=b upper=f keys=(SET) ---- Aggregate: SET key count: 2 + SET latest count: 2 flush ---- @@ -20,15 +21,19 @@ scan-statistics lower=b upper=e keys=(SET) levels=(0) ---- Level 0: SET key count: 1 + SET latest count: 1 Aggregate: SET key count: 1 + SET latest count: 1 scan-statistics lower=b upper=f keys=(SET) levels=(0) ---- Level 0: SET key count: 2 + SET latest count: 2 Aggregate: SET key count: 2 + SET latest count: 2 scan-statistics lower=f upper=l keys=(SET) ---- @@ -49,9 +54,11 @@ scan-statistics lower=b upper=f keys=(SET, DEL) levels=(0) Level 0: SET key count: 2 DEL key count: 2 + DEL latest count: 2 Aggregate: SET key count: 2 DEL key count: 2 + DEL latest count: 2 reset ---- @@ -81,8 +88,10 @@ scan-statistics lower=b upper=f keys=(SET) levels=(6) ---- Level 6: SET key count: 1 + SET latest count: 1 Aggregate: SET key count: 1 + SET latest count: 1 batch commit set c a @@ -96,10 +105,13 @@ scan-statistics lower=b upper=f keys=(SET) levels=(0, 6) ---- Level 0: SET key count: 1 + SET latest count: 1 Level 6: SET key count: 1 + SET latest count: 1 Aggregate: SET key count: 2 + SET latest count: 2 reset ----