forked from cockroachdb/pebble
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sstable: reduce block cache memory fragmentation
Currently, the sstable writer contains heuristics to flush sstable blocks once the size reaches a specified threshold. In CRDB this is defined as 32KiB. However, when these blocks are loaded into memory additional metadata is allocated sometimes exceeding the 32KiB threshold. Since CRDB uses jemalloc, these allocations use a 40KiB size class which leads to significant internal fragmentation. In addition, since the system is unaware of these size classes we cannot design heuristics that prioritize reducing memory fragmentation. Reducing internal fragmentation can help reduce CRDB's memory footprint. This commit decrements the target block size to prevent internal fragmentation for small key-value pairs and adds support for optionally specifying size classes to enable a new set of heuristics that will reduce internal fragmentation for workloads with larger key-value pairs. Fixes: cockroachdb#999.
- Loading branch information
1 parent
c34894c
commit f874ba5
Showing
13 changed files
with
274 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
// Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use | ||
// of this source code is governed by a BSD-style license that can be found in | ||
// the LICENSE file. | ||
|
||
//go:build ((!invariants && !tracing) || race) && cgo | ||
// +build !invariants,!tracing race | ||
// +build cgo | ||
|
||
package cache | ||
|
||
import ( | ||
"unsafe" | ||
|
||
"github.com/cockroachdb/pebble/internal/manual" | ||
) | ||
|
||
// ValueMetadataSize denotes the number of bytes of metadata allocated for a | ||
// cache entry. | ||
const ValueMetadataSize = int(unsafe.Sizeof(Value{})) | ||
|
||
func newValue(n int) *Value { | ||
if n == 0 { | ||
return nil | ||
} | ||
|
||
// When we're not performing leak detection, the lifetime of the returned | ||
// Value is exactly the lifetime of the backing buffer and we can manually | ||
// allocate both. | ||
b := manual.New(ValueMetadataSize + n) | ||
v := (*Value)(unsafe.Pointer(&b[0])) | ||
v.buf = b[ValueMetadataSize:] | ||
v.ref.init(1) | ||
return v | ||
} | ||
|
||
func (v *Value) free() { | ||
// When we're not performing leak detection, the Value and buffer were | ||
// allocated contiguously. | ||
n := ValueMetadataSize + cap(v.buf) | ||
buf := (*[manual.MaxArrayLen]byte)(unsafe.Pointer(v))[:n:n] | ||
v.buf = nil | ||
manual.Free(buf) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# Block size exceeds target block size. | ||
build key-size=0 val-size=0 block-size=64 target-size=64 threshold=59 | ||
---- | ||
true | ||
|
||
# Block size does not exceed threshold size. | ||
build key-size=0 val-size=0 block-size=59 target-size=64 threshold=59 | ||
---- | ||
false | ||
|
||
# New block size exceeds the target size. | ||
build key-size=1 val-size=1 block-size=60 target-size=64 threshold=32 | ||
---- | ||
true | ||
|
||
# New block size does not exceed the target size. | ||
build key-size=1 val-size=1 block-size=40 target-size=64 threshold=32 | ||
---- | ||
false | ||
|
||
# New block size does not exceed the target size with hints enabled. | ||
build key-size=1 val-size=1 block-size=36 target-size=64 threshold=0 hints=8,16,32,64,128 | ||
---- | ||
false | ||
|
||
# New block size reduces internal fragmentation. | ||
build key-size=1 val-size=60 block-size=38 target-size=64 threshold=0 hints=8,16,32,64,128 | ||
---- | ||
false | ||
|
||
# New block size increases internal fragmentation. | ||
build key-size=1 val-size=40 block-size=38 target-size=64 threshold=0 hints=8,16,32,64,128 | ||
---- | ||
true | ||
|
||
# Block size target exceeded with hints enabled. | ||
build key-size=1 val-size=1 block-size=64 target-size=64 threshold=0 hints=8,16,32,64,128 | ||
---- | ||
true | ||
|
||
# Block size target exceeded, however, new block would reduce internal fragmentation. | ||
build key-size=1 val-size=1 block-size=70 target-size=64 threshold=0 hints=8,16,32,64,128 | ||
---- | ||
false | ||
|
||
# Fall back to heuristics with hints disabled when size class is limited. | ||
build key-size=1 val-size=1 block-size=59 target-size=64 threshold=59 hints=8,16,32 | ||
---- | ||
false | ||
|
||
# Flush when new size class could not be computed. | ||
build key-size=1 val-size=60 block-size=50 target-size=64 threshold=0 hints=8,16,32,64 | ||
---- | ||
true |
Oops, something went wrong.