Skip to content

Commit

Permalink
Improve memory alignment
Browse files Browse the repository at this point in the history
bbolt/db.go:38:9: struct of size 528 could be 496
bbolt/db.go:1012:12: struct with 40 pointer bytes could be 24
bbolt/db.go:1280:14: struct of size 104 could be 80
bbolt/freelist.go:24:15: struct with 136 pointer bytes could be 112
bbolt/node.go:12:11: struct with 88 pointer bytes could be 72
bbolt/tx.go:27:9: struct with 192 pointer bytes could be 88

Signed-off-by: Manuel Rüger <[email protected]>
  • Loading branch information
mrueg committed Jan 10, 2024
1 parent 4a059b4 commit fef18ee
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 102 deletions.
185 changes: 95 additions & 90 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,81 @@ const (
// All data access is performed through transactions which can be obtained through the DB.
// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
type DB struct {
pagePool sync.Pool

logger Logger

openFile func(string, int, os.FileMode) (*os.File, error)
file *os.File
data *[maxMapSize]byte
meta0 *common.Meta
meta1 *common.Meta
rwtx *Tx

freelist *freelist
batch *batch

ops struct {
writeAt func(b []byte, off int64) (n int, err error)
}

// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
// dramatic performance degradation if database is large and fragmentation in freelist is common.
// The alternative one is using hashmap, it is faster in almost all circumstances
// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
// The default type is array
FreelistType FreelistType

path string
// `dataref` isn't used at all on Windows, and the golangci-lint
// always fails on Windows platform.
//nolint
dataref []byte // mmap'ed readonly, write throws SEGV
txs []*Tx

// Put `stats` at the first field to ensure it's 64-bit aligned. Note that
// the first word in an allocated struct can be relied upon to be 64-bit
// aligned. Refer to https://pkg.go.dev/sync/atomic#pkg-note-BUG. Also
// refer to discussion in https://github.com/etcd-io/bbolt/issues/577.
stats Stats

// If you want to read the entire database fast, you can set MmapFlag to
// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
MmapFlags int

// MaxBatchSize is the maximum size of a batch. Default value is
// copied from DefaultMaxBatchSize in Open.
//
// If <=0, disables batching.
//
// Do not change concurrently with calls to Batch.
MaxBatchSize int

// MaxBatchDelay is the maximum delay before a batch starts.
// Default value is copied from DefaultMaxBatchDelay in Open.
//
// If <=0, effectively disables batching.
//
// Do not change concurrently with calls to Batch.
MaxBatchDelay time.Duration

// AllocSize is the amount of space allocated when the database
// needs to create new pages. This is done to amortize the cost
// of truncate() and fsync() when growing the data file.
AllocSize int

datasz int
pageSize int
mmaplock sync.RWMutex // Protects mmap access during remapping.
statlock sync.RWMutex // Protects stats access.

freelistLoad sync.Once

batchMu sync.Mutex

rwlock sync.Mutex // Allows only one writer at a time.
metalock sync.Mutex // Protects meta page access.

// When enabled, the database will perform a Check() after every commit.
// A panic is issued if the database is in an inconsistent state. This
// flag has a large performance impact so it should only be used for
Expand All @@ -65,13 +134,6 @@ type DB struct {
// re-sync during recovery.
NoFreelistSync bool

// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
// dramatic performance degradation if database is large and fragmentation in freelist is common.
// The alternative one is using hashmap, it is faster in almost all circumstances
// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
// The default type is array
FreelistType FreelistType

// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
Expand All @@ -85,71 +147,13 @@ type DB struct {
// set to `true`.
PreLoadFreelist bool

// If you want to read the entire database fast, you can set MmapFlag to
// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
MmapFlags int

// MaxBatchSize is the maximum size of a batch. Default value is
// copied from DefaultMaxBatchSize in Open.
//
// If <=0, disables batching.
//
// Do not change concurrently with calls to Batch.
MaxBatchSize int

// MaxBatchDelay is the maximum delay before a batch starts.
// Default value is copied from DefaultMaxBatchDelay in Open.
//
// If <=0, effectively disables batching.
//
// Do not change concurrently with calls to Batch.
MaxBatchDelay time.Duration

// AllocSize is the amount of space allocated when the database
// needs to create new pages. This is done to amortize the cost
// of truncate() and fsync() when growing the data file.
AllocSize int

// Mlock locks database file in memory when set to true.
// It prevents major page faults, however used memory can't be reclaimed.
//
// Supported only on Unix via mlock/munlock syscalls.
Mlock bool

logger Logger

path string
openFile func(string, int, os.FileMode) (*os.File, error)
file *os.File
// `dataref` isn't used at all on Windows, and the golangci-lint
// always fails on Windows platform.
//nolint
dataref []byte // mmap'ed readonly, write throws SEGV
data *[maxMapSize]byte
datasz int
meta0 *common.Meta
meta1 *common.Meta
pageSize int
opened bool
rwtx *Tx
txs []*Tx

freelist *freelist
freelistLoad sync.Once

pagePool sync.Pool

batchMu sync.Mutex
batch *batch

rwlock sync.Mutex // Allows only one writer at a time.
metalock sync.Mutex // Protects meta page access.
mmaplock sync.RWMutex // Protects mmap access during remapping.
statlock sync.RWMutex // Protects stats access.

ops struct {
writeAt func(b []byte, off int64) (n int, err error)
}
opened bool

// Read only mode.
// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
Expand Down Expand Up @@ -1012,8 +1016,8 @@ type call struct {
type batch struct {
db *DB
timer *time.Timer
start sync.Once
calls []call
start sync.Once
}

// trigger runs the batch if it hasn't already been run.
Expand Down Expand Up @@ -1278,21 +1282,13 @@ func (db *DB) freepages() []common.Pgid {

// Options represents the options that can be set when opening a database.
type Options struct {
// Timeout is the amount of time to wait to obtain a file lock.
// When set to zero it will wait indefinitely.
Timeout time.Duration

// Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool

// Do not sync freelist to disk. This improves the database write performance
// under normal operation, but requires a full database re-sync during recovery.
NoFreelistSync bool
// Logger is the logger used for bbolt.
Logger Logger

// PreLoadFreelist sets whether to load the free pages when opening
// the db file. Note when opening db in write mode, bbolt will always
// load the free pages.
PreLoadFreelist bool
// OpenFile is used to open files. It defaults to os.OpenFile. This option
// is useful for writing hermetic tests.
OpenFile func(string, int, os.FileMode) (*os.File, error)

// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
// dramatic performance degradation if database is large and fragmentation in freelist is common.
Expand All @@ -1301,9 +1297,9 @@ type Options struct {
// The default type is array
FreelistType FreelistType

// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool
// Timeout is the amount of time to wait to obtain a file lock.
// When set to zero it will wait indefinitely.
Timeout time.Duration

// Sets the DB.MmapFlags flag before memory mapping the file.
MmapFlags int
Expand All @@ -1321,22 +1317,31 @@ type Options struct {
// PageSize overrides the default OS page size.
PageSize int

// Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool

// Do not sync freelist to disk. This improves the database write performance
// under normal operation, but requires a full database re-sync during recovery.
NoFreelistSync bool

// PreLoadFreelist sets whether to load the free pages when opening
// the db file. Note when opening db in write mode, bbolt will always
// load the free pages.
PreLoadFreelist bool

// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool

// NoSync sets the initial value of DB.NoSync. Normally this can just be
// set directly on the DB itself when returned from Open(), but this option
// is useful in APIs which expose Options but not the underlying DB.
NoSync bool

// OpenFile is used to open files. It defaults to os.OpenFile. This option
// is useful for writing hermetic tests.
OpenFile func(string, int, os.FileMode) (*os.File, error)

// Mlock locks database file in memory when set to true.
// It prevents potential page faults, however
// used memory can't be reclaimed. (UNIX only)
Mlock bool

// Logger is the logger used for bbolt.
Logger Logger
}

func (o *Options) String() string {
Expand Down
6 changes: 3 additions & 3 deletions freelist.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,20 @@ type pidSet map[common.Pgid]struct{}
// freelist represents a list of all pages that are available for allocation.
// It also tracks pages that have been freed but are still in use by open transactions.
type freelist struct {
freelistType FreelistType // freelist type
ids []common.Pgid // all free and available free page ids.
allocs map[common.Pgid]common.Txid // mapping of Txid that allocated a pgid.
pending map[common.Txid]*txPending // mapping of soon-to-be free page ids by tx.
cache map[common.Pgid]struct{} // fast lookup of all free and pending page ids.
freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
forwardMap map[common.Pgid]uint64 // key is start pgid, value is its span size
backwardMap map[common.Pgid]uint64 // key is end pgid, value is its span size
freePagesCount uint64 // count of free pages(hashmap version)
allocate func(txid common.Txid, n int) common.Pgid // the freelist allocate func
free_count func() int // the function which gives you free page number
mergeSpans func(ids common.Pgids) // the mergeSpan func
getFreePageIDs func() []common.Pgid // get free pgids func
readIDs func(pgids []common.Pgid) // readIDs func reads list of pages and init the freelist
freelistType FreelistType // freelist type
ids []common.Pgid // all free and available free page ids.
freePagesCount uint64 // count of free pages(hashmap version)
}

// newFreelist returns an empty, initialized freelist.
Expand Down
10 changes: 5 additions & 5 deletions node.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ import (
// node represents an in-memory, deserialized page.
type node struct {
bucket *Bucket
isLeaf bool
unbalanced bool
spilled bool
key []byte
pgid common.Pgid
parent *node
key []byte
children nodes
inodes common.Inodes
pgid common.Pgid
isLeaf bool
unbalanced bool
spilled bool
}

// root returns the top-level node this node is attached to.
Expand Down
9 changes: 5 additions & 4 deletions tx.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,23 @@ import (
// are using them. A long running read transaction can cause the database to
// quickly grow.
type Tx struct {
writable bool
managed bool
db *DB
meta *common.Meta
root Bucket
pages map[common.Pgid]*common.Page
stats TxStats
root Bucket
commitHandlers []func()

stats TxStats

// WriteFlag specifies the flag for write-related methods like WriteTo().
// Tx opens the database file with the specified flag to copy the data.
//
// By default, the flag is unset, which works well for mostly in-memory
// workloads. For databases that are much larger than available RAM,
// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
WriteFlag int
writable bool
managed bool
}

// init initializes the transaction.
Expand Down

0 comments on commit fef18ee

Please sign in to comment.