From 15e17bf899f766b87a6284c75cc295120d426416 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 31 Mar 2024 11:03:15 -0400 Subject: [PATCH 01/14] Cleanup calculateNodeIDs --- x/merkledb/db.go | 18 +++--- x/merkledb/history.go | 2 +- x/merkledb/trie_test.go | 16 +++--- x/merkledb/view.go | 123 +++++++++++++++++++++++----------------- 4 files changed, 90 insertions(+), 69 deletions(-) diff --git a/x/merkledb/db.go b/x/merkledb/db.go index b3251878216..3b3d5515b59 100644 --- a/x/merkledb/db.go +++ b/x/merkledb/db.go @@ -216,9 +216,9 @@ type merkleDB struct { // Valid children of this trie. childViews []*view - // calculateNodeIDsSema controls the number of goroutines inside - // [calculateNodeIDsHelper] at any given time. - calculateNodeIDsSema *semaphore.Weighted + // hashNodesSema controls the number of goroutines that are created inside + // [hashChangedNode] at any given time. + hashNodesSema *semaphore.Weighted tokenSize int } @@ -270,12 +270,12 @@ func newDatabase( bufferPool, metrics, int(config.ValueNodeCacheSize)), - history: newTrieHistory(int(config.HistoryLength)), - debugTracer: getTracerIfEnabled(config.TraceLevel, DebugTrace, config.Tracer), - infoTracer: getTracerIfEnabled(config.TraceLevel, InfoTrace, config.Tracer), - childViews: make([]*view, 0, defaultPreallocationSize), - calculateNodeIDsSema: semaphore.NewWeighted(int64(rootGenConcurrency)), - tokenSize: BranchFactorToTokenSize[config.BranchFactor], + history: newTrieHistory(int(config.HistoryLength)), + debugTracer: getTracerIfEnabled(config.TraceLevel, DebugTrace, config.Tracer), + infoTracer: getTracerIfEnabled(config.TraceLevel, InfoTrace, config.Tracer), + childViews: make([]*view, 0, defaultPreallocationSize), + hashNodesSema: semaphore.NewWeighted(int64(rootGenConcurrency)), + tokenSize: BranchFactorToTokenSize[config.BranchFactor], } if err := trieDB.initializeRoot(); err != nil { diff --git a/x/merkledb/history.go b/x/merkledb/history.go index 22d87cd1cb4..bd41d29268e 100644 --- a/x/merkledb/history.go +++ b/x/merkledb/history.go @@ -57,7 +57,7 @@ type changeSummary struct { // The ID of the trie after these changes. rootID ids.ID // The root before/after this change. - // Set in [calculateNodeIDs]. + // Set in [applyValueChanges]. rootChange change[maybe.Maybe[*node]] nodes map[Key]*change[*node] values map[Key]*change[maybe.Maybe[[]byte]] diff --git a/x/merkledb/trie_test.go b/x/merkledb/trie_test.go index bab5880f4b6..c2495e5ebc6 100644 --- a/x/merkledb/trie_test.go +++ b/x/merkledb/trie_test.go @@ -21,7 +21,7 @@ import ( func getNodeValue(t Trie, key string) ([]byte, error) { path := ToKey([]byte(key)) if asView, ok := t.(*view); ok { - if err := asView.calculateNodeIDs(context.Background()); err != nil { + if err := asView.applyValueChanges(context.Background()); err != nil { return nil, err } } @@ -131,7 +131,7 @@ func TestVisitPathToKey(t *testing.T) { require.NoError(err) require.IsType(&view{}, trieIntf) trie = trieIntf.(*view) - require.NoError(trie.calculateNodeIDs(context.Background())) + require.NoError(trie.applyValueChanges(context.Background())) nodePath = make([]*node, 0, 1) require.NoError(visitPathToKey(trie, ToKey(key1), func(n *node) error { @@ -156,7 +156,7 @@ func TestVisitPathToKey(t *testing.T) { require.NoError(err) require.IsType(&view{}, trieIntf) trie = trieIntf.(*view) - require.NoError(trie.calculateNodeIDs(context.Background())) + require.NoError(trie.applyValueChanges(context.Background())) nodePath = make([]*node, 0, 2) require.NoError(visitPathToKey(trie, ToKey(key2), func(n *node) error { @@ -185,7 +185,7 @@ func TestVisitPathToKey(t *testing.T) { require.NoError(err) require.IsType(&view{}, trieIntf) trie = trieIntf.(*view) - require.NoError(trie.calculateNodeIDs(context.Background())) + require.NoError(trie.applyValueChanges(context.Background())) // Trie is: // [] @@ -775,7 +775,7 @@ func Test_Trie_ChainDeletion(t *testing.T) { ) require.NoError(err) - require.NoError(newTrie.(*view).calculateNodeIDs(context.Background())) + require.NoError(newTrie.(*view).applyValueChanges(context.Background())) maybeRoot := newTrie.getRoot() require.NoError(err) require.True(maybeRoot.HasValue()) @@ -794,7 +794,7 @@ func Test_Trie_ChainDeletion(t *testing.T) { }, ) require.NoError(err) - require.NoError(newTrie.(*view).calculateNodeIDs(context.Background())) + require.NoError(newTrie.(*view).applyValueChanges(context.Background())) // trie should be empty root := newTrie.getRoot() @@ -861,7 +861,7 @@ func Test_Trie_NodeCollapse(t *testing.T) { ) require.NoError(err) - require.NoError(trie.(*view).calculateNodeIDs(context.Background())) + require.NoError(trie.(*view).applyValueChanges(context.Background())) for _, kv := range kvs { node, err := trie.getEditableNode(ToKey(kv.Key), true) @@ -888,7 +888,7 @@ func Test_Trie_NodeCollapse(t *testing.T) { ) require.NoError(err) - require.NoError(trie.(*view).calculateNodeIDs(context.Background())) + require.NoError(trie.(*view).applyValueChanges(context.Background())) for _, kv := range deletedKVs { _, err := trie.getEditableNode(ToKey(kv.Key), true) diff --git a/x/merkledb/view.go b/x/merkledb/view.go index dd564afefdd..d7565571075 100644 --- a/x/merkledb/view.go +++ b/x/merkledb/view.go @@ -45,11 +45,12 @@ type view struct { committed bool commitLock sync.RWMutex - // tracking bool to enforce that no changes are made to the trie after the nodes have been calculated - nodesAlreadyCalculated utils.Atomic[bool] + // tracking bool to enforce that no changes are made to the trie after the + // nodes have been calculated + valueChangesApplied utils.Atomic[bool] - // calculateNodesOnce is a once to ensure that node calculation only occurs a single time - calculateNodesOnce sync.Once + // applyValueChangesOnce is a once to ensure that node calculation only occurs a single time + applyValueChangesOnce sync.Once // Controls the view's validity related fields. // Must be held while reading/writing [childViews], [invalidated], and [parentTrie]. @@ -117,7 +118,7 @@ func (v *view) NewView( return v.getParentTrie().NewView(ctx, changes) } - if err := v.calculateNodeIDs(ctx); err != nil { + if err := v.applyValueChanges(ctx); err != nil { return nil, err } @@ -198,8 +199,8 @@ func newViewWithChanges( } // since this is a set of historical changes, all nodes have already been calculated // since no new changes have occurred, no new calculations need to be done - v.calculateNodesOnce.Do(func() {}) - v.nodesAlreadyCalculated.Set(true) + v.applyValueChangesOnce.Do(func() {}) + v.valueChangesApplied.Set(true) return v, nil } @@ -211,45 +212,32 @@ func (v *view) getRoot() maybe.Maybe[*node] { return v.root } -// Recalculates the node IDs for all changed nodes in the trie. -// Cancelling [ctx] doesn't cancel calculation. It's used only for tracing. -func (v *view) calculateNodeIDs(ctx context.Context) error { +// applyValueChanges generates the node changes from the value changes. It then +// hashes the changed nodes to calculate the new trie. +// +// Cancelling [ctx] doesn't cancel the operation. It's used only for tracing. +func (v *view) applyValueChanges(ctx context.Context) error { var err error - v.calculateNodesOnce.Do(func() { + v.applyValueChangesOnce.Do(func() { + // Create the span inside the once wrapper to make traces more useful. + // Otherwise, spans would be created during calls where the IDs are not + // re-calculated. + ctx, span := v.db.infoTracer.Start(ctx, "MerkleDB.view.applyValueChanges") + defer span.End() + if v.isInvalid() { err = ErrInvalid return } - defer v.nodesAlreadyCalculated.Set(true) + defer v.valueChangesApplied.Set(true) oldRoot := maybe.Bind(v.root, (*node).clone) - // We wait to create the span until after checking that we need to actually - // calculateNodeIDs to make traces more useful (otherwise there may be a span - // per key modified even though IDs are not re-calculated). - _, span := v.db.infoTracer.Start(ctx, "MerkleDB.view.calculateNodeIDs") - defer span.End() - - // add all the changed key/values to the nodes of the trie - for key, change := range v.changes.values { - if change.after.IsNothing() { - // Note we're setting [err] defined outside this function. - if err = v.remove(key); err != nil { - return - } - // Note we're setting [err] defined outside this function. - } else if _, err = v.insert(key, change.after); err != nil { - return - } - } - - if !v.root.IsNothing() { - _ = v.db.calculateNodeIDsSema.Acquire(context.Background(), 1) - v.changes.rootID = v.calculateNodeIDsHelper(v.root.Value()) - v.db.calculateNodeIDsSema.Release(1) - } else { - v.changes.rootID = ids.Empty + // Note we're setting [err] defined outside this function. + if err = v.calculateNodeChanges(ctx); err != nil { + return } + v.hashChangedNodes(ctx) v.changes.rootChange = change[maybe.Maybe[*node]]{ before: oldRoot, @@ -265,9 +253,42 @@ func (v *view) calculateNodeIDs(ctx context.Context) error { return err } +func (v *view) calculateNodeChanges(ctx context.Context) error { + _, span := v.db.infoTracer.Start(ctx, "MerkleDB.view.calculateNodeChanges") + defer span.End() + + // Add all the changed key/values to the nodes of the trie + for key, change := range v.changes.values { + if change.after.IsNothing() { + if err := v.remove(key); err != nil { + return err + } + } else if _, err := v.insert(key, change.after); err != nil { + return err + } + } + + return nil +} + +func (v *view) hashChangedNodes(ctx context.Context) { + _, span := v.db.infoTracer.Start(ctx, "MerkleDB.view.hashChangedNodes") + defer span.End() + + if v.root.IsNothing() { + v.changes.rootID = ids.Empty + return + } + + _ = v.db.hashNodesSema.Acquire(context.Background(), 1) + defer v.db.hashNodesSema.Release(1) + + v.changes.rootID = v.hashChangedNode(v.root.Value()) +} + // Calculates the ID of all descendants of [n] which need to be recalculated, // and then calculates the ID of [n] itself. -func (v *view) calculateNodeIDsHelper(n *node) ids.ID { +func (v *view) hashChangedNode(n *node) ids.ID { // We use [wg] to wait until all descendants of [n] have been updated. var wg sync.WaitGroup @@ -282,16 +303,16 @@ func (v *view) calculateNodeIDsHelper(n *node) ids.ID { childEntry.hasValue = childNodeChange.after.hasValue() // Try updating the child and its descendants in a goroutine. - if ok := v.db.calculateNodeIDsSema.TryAcquire(1); ok { + if ok := v.db.hashNodesSema.TryAcquire(1); ok { wg.Add(1) go func() { - childEntry.id = v.calculateNodeIDsHelper(childNodeChange.after) - v.db.calculateNodeIDsSema.Release(1) + childEntry.id = v.hashChangedNode(childNodeChange.after) + v.db.hashNodesSema.Release(1) wg.Done() }() } else { // We're at the goroutine limit; do the work in this goroutine. - childEntry.id = v.calculateNodeIDsHelper(childNodeChange.after) + childEntry.id = v.hashChangedNode(childNodeChange.after) } } @@ -307,7 +328,7 @@ func (v *view) GetProof(ctx context.Context, key []byte) (*Proof, error) { _, span := v.db.infoTracer.Start(ctx, "MerkleDB.view.GetProof") defer span.End() - if err := v.calculateNodeIDs(ctx); err != nil { + if err := v.applyValueChanges(ctx); err != nil { return nil, err } @@ -333,7 +354,7 @@ func (v *view) GetRangeProof( _, span := v.db.infoTracer.Start(ctx, "MerkleDB.view.GetRangeProof") defer span.End() - if err := v.calculateNodeIDs(ctx); err != nil { + if err := v.applyValueChanges(ctx); err != nil { return nil, err } result, err := getRangeProof(v, start, end, maxLength) @@ -371,7 +392,7 @@ func (v *view) commitToDB(ctx context.Context) error { // Call this here instead of in [v.db.commitChanges] // because doing so there would be a deadlock. - if err := v.calculateNodeIDs(ctx); err != nil { + if err := v.applyValueChanges(ctx); err != nil { return err } @@ -417,7 +438,7 @@ func (v *view) updateParent(newParent View) { // GetMerkleRoot returns the ID of the root of this view. func (v *view) GetMerkleRoot(ctx context.Context) (ids.ID, error) { - if err := v.calculateNodeIDs(ctx); err != nil { + if err := v.applyValueChanges(ctx); err != nil { return ids.Empty, err } return v.changes.rootID, nil @@ -487,7 +508,7 @@ func (v *view) getValue(key Key) ([]byte, error) { // Must not be called after [calculateNodeIDs] has returned. func (v *view) remove(key Key) error { - if v.nodesAlreadyCalculated.Get() { + if v.valueChangesApplied.Get() { return ErrNodesAlreadyCalculated } @@ -553,7 +574,7 @@ func (v *view) remove(key Key) error { // * [n] has children. // Must not be called after [calculateNodeIDs] has returned. func (v *view) compressNodePath(parent, n *node) error { - if v.nodesAlreadyCalculated.Get() { + if v.valueChangesApplied.Get() { return ErrNodesAlreadyCalculated } @@ -624,7 +645,7 @@ func (v *view) insert( key Key, value maybe.Maybe[[]byte], ) (*node, error) { - if v.nodesAlreadyCalculated.Get() { + if v.valueChangesApplied.Get() { return nil, ErrNodesAlreadyCalculated } @@ -775,7 +796,7 @@ func (v *view) recordNodeDeleted(after *node, hadValue bool) error { // If it is an existing node, record what its value was before it was changed. // Must not be called after [calculateNodeIDs] has returned. func (v *view) recordKeyChange(key Key, after *node, hadValue bool, newNode bool) error { - if v.nodesAlreadyCalculated.Get() { + if v.valueChangesApplied.Get() { return ErrNodesAlreadyCalculated } @@ -807,7 +828,7 @@ func (v *view) recordKeyChange(key Key, after *node, hadValue bool, newNode bool // That's deferred until we call [calculateNodeIDs]. // Must not be called after [calculateNodeIDs] has returned. func (v *view) recordValueChange(key Key, value maybe.Maybe[[]byte]) error { - if v.nodesAlreadyCalculated.Get() { + if v.valueChangesApplied.Get() { return ErrNodesAlreadyCalculated } From 4ea7ef02e3ce7e46d0c087d1a1624b9753df9344 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 31 Mar 2024 11:11:34 -0400 Subject: [PATCH 02/14] nit --- x/merkledb/view.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/x/merkledb/view.go b/x/merkledb/view.go index d7565571075..958ec33fb0b 100644 --- a/x/merkledb/view.go +++ b/x/merkledb/view.go @@ -45,11 +45,12 @@ type view struct { committed bool commitLock sync.RWMutex - // tracking bool to enforce that no changes are made to the trie after the - // nodes have been calculated + // valueChangesApplied is used to enforce that no changes are made to the + // trie after the nodes have been calculated valueChangesApplied utils.Atomic[bool] - // applyValueChangesOnce is a once to ensure that node calculation only occurs a single time + // applyValueChangesOnce prevents node calculation from occuring multiple + // times applyValueChangesOnce sync.Once // Controls the view's validity related fields. From 3c786101f4904848acbdbce78aeda493961f4884 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 31 Mar 2024 11:16:38 -0400 Subject: [PATCH 03/14] typo --- x/merkledb/view.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x/merkledb/view.go b/x/merkledb/view.go index 958ec33fb0b..41874de4fc4 100644 --- a/x/merkledb/view.go +++ b/x/merkledb/view.go @@ -49,7 +49,7 @@ type view struct { // trie after the nodes have been calculated valueChangesApplied utils.Atomic[bool] - // applyValueChangesOnce prevents node calculation from occuring multiple + // applyValueChangesOnce prevents node calculation from occurring multiple // times applyValueChangesOnce sync.Once From eaa64a0a7c746fbd08a9562f8033433c8dd2a5d5 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 31 Mar 2024 13:06:13 -0400 Subject: [PATCH 04/14] Add initial hashing benchmark --- x/merkledb/view_test.go | 73 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 x/merkledb/view_test.go diff --git a/x/merkledb/view_test.go b/x/merkledb/view_test.go new file mode 100644 index 00000000000..6d605af77d9 --- /dev/null +++ b/x/merkledb/view_test.go @@ -0,0 +1,73 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package merkledb + +import ( + "context" + "encoding/binary" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/ava-labs/avalanchego/database" + "github.com/ava-labs/avalanchego/utils/hashing" +) + +func Benchmark_HashChangedNodes(b *testing.B) { + tests := []struct { + name string + numKeys uint64 + }{ + { + name: "1", + numKeys: 1, + }, + { + name: "10", + numKeys: 10, + }, + { + name: "100", + numKeys: 100, + }, + { + name: "1000", + numKeys: 1000, + }, + { + name: "10000", + numKeys: 10000, + }, + { + name: "100000", + numKeys: 100000, + }, + } + for _, test := range tests { + db, err := getBasicDB() + require.NoError(b, err) + + ops := make([]database.BatchOp, 0, test.numKeys) + for i := uint64(0); i < test.numKeys; i++ { + k := binary.AppendUvarint(nil, i) + ops = append(ops, database.BatchOp{ + Key: k, + Value: hashing.ComputeHash256(k), + }) + } + + ctx := context.Background() + viewIntf, err := db.NewView(ctx, ViewChanges{BatchOps: ops}) + require.NoError(b, err) + + view := viewIntf.(*view) + require.NoError(b, view.calculateNodeChanges(ctx)) + + b.Run(test.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + view.hashChangedNodes(ctx) + } + }) + } +} From 4dec1abab4813fa93445f1c4c1d2cf455c8dd92b Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 31 Mar 2024 18:36:16 -0400 Subject: [PATCH 05/14] add tests --- x/merkledb/view_test.go | 114 ++++++++++++++++++++++++---------------- 1 file changed, 69 insertions(+), 45 deletions(-) diff --git a/x/merkledb/view_test.go b/x/merkledb/view_test.go index 6d605af77d9..9a307491cc0 100644 --- a/x/merkledb/view_test.go +++ b/x/merkledb/view_test.go @@ -14,56 +14,80 @@ import ( "github.com/ava-labs/avalanchego/utils/hashing" ) -func Benchmark_HashChangedNodes(b *testing.B) { - tests := []struct { - name string - numKeys uint64 - }{ - { - name: "1", - numKeys: 1, - }, - { - name: "10", - numKeys: 10, - }, - { - name: "100", - numKeys: 100, - }, - { - name: "1000", - numKeys: 1000, - }, - { - name: "10000", - numKeys: 10000, - }, - { - name: "100000", - numKeys: 100000, - }, +var hashChangedNodesTests = []struct { + name string + numKeys uint64 + expectedRootHash string +}{ + { + name: "1", + numKeys: 1, + expectedRootHash: "2A4DRkSWbTvSxgA1UMGp1Mpt1yzMFaeMMiDnrijVGJXPcRYiD4", + }, + { + name: "10", + numKeys: 10, + expectedRootHash: "2PGy7QvbYwVwn5QmLgj4KBgV2BisanZE8Nue2SxK9ffybb4mAn", + }, + { + name: "100", + numKeys: 100, + expectedRootHash: "LCeS4DWh6TpNKWH4ke9a2piSiwwLbmxGUj8XuaWx1XDGeCMAv", + }, + { + name: "1000", + numKeys: 1000, + expectedRootHash: "2S6f84wdRHmnx51mj35DF2owzf8wio5pzNJXfEWfFYFNxUB64T", + }, + { + name: "10000", + numKeys: 10000, + expectedRootHash: "wF6UnhaDoA9fAqiXAcx27xCYBK2aspDBEXkicmC7rs8EzLCD8", + }, + { + name: "100000", + numKeys: 100000, + expectedRootHash: "2Dy3RWZeNDUnUvzXpruB5xdp1V7xxb14M53ywdZVACDkdM66M1", + }, +} + +func makeViewForHashChangedNodes(t require.TestingT, numKeys uint64) *view { + db, err := getBasicDB() + require.NoError(t, err) + + ops := make([]database.BatchOp, 0, numKeys) + for i := uint64(0); i < numKeys; i++ { + k := binary.AppendUvarint(nil, i) + ops = append(ops, database.BatchOp{ + Key: k, + Value: hashing.ComputeHash256(k), + }) } - for _, test := range tests { - db, err := getBasicDB() - require.NoError(b, err) - ops := make([]database.BatchOp, 0, test.numKeys) - for i := uint64(0); i < test.numKeys; i++ { - k := binary.AppendUvarint(nil, i) - ops = append(ops, database.BatchOp{ - Key: k, - Value: hashing.ComputeHash256(k), - }) - } + ctx := context.Background() + viewIntf, err := db.NewView(ctx, ViewChanges{BatchOps: ops}) + require.NoError(t, err) - ctx := context.Background() - viewIntf, err := db.NewView(ctx, ViewChanges{BatchOps: ops}) - require.NoError(b, err) + view := viewIntf.(*view) + require.NoError(t, view.calculateNodeChanges(ctx)) + return view +} - view := viewIntf.(*view) - require.NoError(b, view.calculateNodeChanges(ctx)) +func Test_HashChangedNodes(t *testing.T) { + for _, test := range hashChangedNodesTests { + t.Run(test.name, func(t *testing.T) { + view := makeViewForHashChangedNodes(t, test.numKeys) + ctx := context.Background() + view.hashChangedNodes(ctx) + require.Equal(t, test.expectedRootHash, view.changes.rootID.String()) + }) + } +} +func Benchmark_HashChangedNodes(b *testing.B) { + for _, test := range hashChangedNodesTests { + view := makeViewForHashChangedNodes(b, test.numKeys) + ctx := context.Background() b.Run(test.name, func(b *testing.B) { for i := 0; i < b.N; i++ { view.hashChangedNodes(ctx) From dfeb3fb0410cccffdee69a6baff8043535e7a2b7 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 31 Mar 2024 18:42:32 -0400 Subject: [PATCH 06/14] nits --- x/merkledb/view.go | 5 ++--- x/merkledb/view_test.go | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/x/merkledb/view.go b/x/merkledb/view.go index 41874de4fc4..125c727d049 100644 --- a/x/merkledb/view.go +++ b/x/merkledb/view.go @@ -294,7 +294,6 @@ func (v *view) hashChangedNode(n *node) ids.ID { var wg sync.WaitGroup for childIndex, childEntry := range n.children { - childEntry := childEntry // New variable so goroutine doesn't capture loop variable. childKey := n.key.Extend(ToToken(childIndex, v.tokenSize), childEntry.compressedKey) childNodeChange, ok := v.changes.nodes[childKey] if !ok { @@ -306,11 +305,11 @@ func (v *view) hashChangedNode(n *node) ids.ID { // Try updating the child and its descendants in a goroutine. if ok := v.db.hashNodesSema.TryAcquire(1); ok { wg.Add(1) - go func() { + go func(childEntry *child) { childEntry.id = v.hashChangedNode(childNodeChange.after) v.db.hashNodesSema.Release(1) wg.Done() - }() + }(childEntry) } else { // We're at the goroutine limit; do the work in this goroutine. childEntry.id = v.hashChangedNode(childNodeChange.after) diff --git a/x/merkledb/view_test.go b/x/merkledb/view_test.go index 9a307491cc0..0bd348a25e3 100644 --- a/x/merkledb/view_test.go +++ b/x/merkledb/view_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" "github.com/ava-labs/avalanchego/database" + "github.com/ava-labs/avalanchego/database/memdb" "github.com/ava-labs/avalanchego/utils/hashing" ) @@ -51,8 +52,15 @@ var hashChangedNodesTests = []struct { }, } -func makeViewForHashChangedNodes(t require.TestingT, numKeys uint64) *view { - db, err := getBasicDB() +func makeViewForHashChangedNodes(t require.TestingT, numKeys uint64, parallelism uint) *view { + config := newDefaultConfig() + config.RootGenConcurrency = parallelism + db, err := newDatabase( + context.Background(), + memdb.New(), + newDefaultConfig(), + &mockMetrics{}, + ) require.NoError(t, err) ops := make([]database.BatchOp, 0, numKeys) @@ -76,7 +84,7 @@ func makeViewForHashChangedNodes(t require.TestingT, numKeys uint64) *view { func Test_HashChangedNodes(t *testing.T) { for _, test := range hashChangedNodesTests { t.Run(test.name, func(t *testing.T) { - view := makeViewForHashChangedNodes(t, test.numKeys) + view := makeViewForHashChangedNodes(t, test.numKeys, 16) ctx := context.Background() view.hashChangedNodes(ctx) require.Equal(t, test.expectedRootHash, view.changes.rootID.String()) @@ -86,7 +94,7 @@ func Test_HashChangedNodes(t *testing.T) { func Benchmark_HashChangedNodes(b *testing.B) { for _, test := range hashChangedNodesTests { - view := makeViewForHashChangedNodes(b, test.numKeys) + view := makeViewForHashChangedNodes(b, test.numKeys, 1) ctx := context.Background() b.Run(test.name, func(b *testing.B) { for i := 0; i < b.N; i++ { From 2cf95e5cfa3529e73b369bc2b101f3f1ea237198 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 31 Mar 2024 19:02:39 -0400 Subject: [PATCH 07/14] Remove allocations for keys --- x/merkledb/view.go | 54 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/x/merkledb/view.go b/x/merkledb/view.go index 125c727d049..80c375eca4f 100644 --- a/x/merkledb/view.go +++ b/x/merkledb/view.go @@ -290,11 +290,59 @@ func (v *view) hashChangedNodes(ctx context.Context) { // Calculates the ID of all descendants of [n] which need to be recalculated, // and then calculates the ID of [n] itself. func (v *view) hashChangedNode(n *node) ids.ID { - // We use [wg] to wait until all descendants of [n] have been updated. - var wg sync.WaitGroup + if len(n.children) == 0 { + return n.calculateID(v.db.metrics) + } + + // Calculate the size of the largest child key of this node. This allows + // only allocating a single slice for all of the keys. + var maxBitLength int + for _, childEntry := range n.children { + totalBitLength := n.key.length + v.tokenSize + childEntry.compressedKey.length + maxBitLength = max(maxBitLength, totalBitLength) + } + + var ( + maxBytesNeeded = bytesNeeded(maxBitLength) + // keyBuffer is allocated onto the heap because it is dynamically sized. + keyBuffer = make([]byte, maxBytesNeeded) + // childBuffer is allocated on the stack. + childBuffer = make([]byte, 1) + dualIndex = dualBitIndex(v.tokenSize) + bytesForKey = bytesNeeded(n.key.length) + lastKeyByte byte + + // We use [wg] to wait until all descendants of [n] have been updated. + wg sync.WaitGroup + ) + + if bytesForKey > 0 { + // We can just copy this node's key once. It doesn't change as we + // iterate over the children. + copy(keyBuffer, n.key.value) + lastKeyByte = keyBuffer[bytesForKey-1] + } for childIndex, childEntry := range n.children { - childKey := n.key.Extend(ToToken(childIndex, v.tokenSize), childEntry.compressedKey) + childBuffer[0] = childIndex << dualIndex + childByteKey := Key{ + value: byteSliceToString(childBuffer), + length: v.tokenSize, + } + + totalBitLength := n.key.length + v.tokenSize + childEntry.compressedKey.length + buffer := keyBuffer[:bytesNeeded(totalBitLength)] + // Make sure the last byte of the key is orginally set correctly + if bytesForKey > 0 { + keyBuffer[bytesForKey-1] = lastKeyByte + } + extendIntoBuffer(buffer, childByteKey, n.key.length) + extendIntoBuffer(buffer, childEntry.compressedKey, n.key.length+v.tokenSize) + childKey := Key{ + value: byteSliceToString(buffer), + length: totalBitLength, + } + childNodeChange, ok := v.changes.nodes[childKey] if !ok { // This child wasn't changed. From 7d6ac6c06f410b59520c6ce4c654c3ab6151e442 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 31 Mar 2024 19:03:25 -0400 Subject: [PATCH 08/14] typo --- x/merkledb/view.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x/merkledb/view.go b/x/merkledb/view.go index 80c375eca4f..062558874a5 100644 --- a/x/merkledb/view.go +++ b/x/merkledb/view.go @@ -332,7 +332,7 @@ func (v *view) hashChangedNode(n *node) ids.ID { totalBitLength := n.key.length + v.tokenSize + childEntry.compressedKey.length buffer := keyBuffer[:bytesNeeded(totalBitLength)] - // Make sure the last byte of the key is orginally set correctly + // Make sure the last byte of the key is originally set correctly if bytesForKey > 0 { keyBuffer[bytesForKey-1] = lastKeyByte } From a018121e10034ff6eebdea5e6df2493f47bf323c Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Mon, 1 Apr 2024 11:35:47 -0400 Subject: [PATCH 09/14] comment --- x/merkledb/view.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/x/merkledb/view.go b/x/merkledb/view.go index 062558874a5..5ce7851cbfc 100644 --- a/x/merkledb/view.go +++ b/x/merkledb/view.go @@ -310,6 +310,9 @@ func (v *view) hashChangedNode(n *node) ids.ID { childBuffer = make([]byte, 1) dualIndex = dualBitIndex(v.tokenSize) bytesForKey = bytesNeeded(n.key.length) + // We track the last byte used by the key so that we can reset the + // value. for each key. This is needed because the child buffer may get + // ORed with this byte. lastKeyByte byte // We use [wg] to wait until all descendants of [n] have been updated. From 256cef18a6a09719a726883ba2f8873228ff3191 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Mon, 1 Apr 2024 11:40:50 -0400 Subject: [PATCH 10/14] comment --- x/merkledb/view.go | 1 + 1 file changed, 1 insertion(+) diff --git a/x/merkledb/view.go b/x/merkledb/view.go index 5ce7851cbfc..f1c46cfdc81 100644 --- a/x/merkledb/view.go +++ b/x/merkledb/view.go @@ -290,6 +290,7 @@ func (v *view) hashChangedNodes(ctx context.Context) { // Calculates the ID of all descendants of [n] which need to be recalculated, // and then calculates the ID of [n] itself. func (v *view) hashChangedNode(n *node) ids.ID { + // If there are no children, we can avoid allocating [keyBuffer]. if len(n.children) == 0 { return n.calculateID(v.db.metrics) } From c4ffa4c920aabe12f3cd2ca3b9220063fe529678 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Mon, 1 Apr 2024 12:51:23 -0400 Subject: [PATCH 11/14] fix concurrency --- x/merkledb/view_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x/merkledb/view_test.go b/x/merkledb/view_test.go index 0bd348a25e3..f321dffd511 100644 --- a/x/merkledb/view_test.go +++ b/x/merkledb/view_test.go @@ -58,7 +58,7 @@ func makeViewForHashChangedNodes(t require.TestingT, numKeys uint64, parallelism db, err := newDatabase( context.Background(), memdb.New(), - newDefaultConfig(), + config, &mockMetrics{}, ) require.NoError(t, err) From 8a880e4f43b4ae85c1469a7651557bcf0714f9e1 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Mon, 1 Apr 2024 21:28:48 -0400 Subject: [PATCH 12/14] nit --- x/merkledb/view.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x/merkledb/view.go b/x/merkledb/view.go index f1c46cfdc81..77852246e83 100644 --- a/x/merkledb/view.go +++ b/x/merkledb/view.go @@ -338,7 +338,7 @@ func (v *view) hashChangedNode(n *node) ids.ID { buffer := keyBuffer[:bytesNeeded(totalBitLength)] // Make sure the last byte of the key is originally set correctly if bytesForKey > 0 { - keyBuffer[bytesForKey-1] = lastKeyByte + buffer[bytesForKey-1] = lastKeyByte } extendIntoBuffer(buffer, childByteKey, n.key.length) extendIntoBuffer(buffer, childEntry.compressedKey, n.key.length+v.tokenSize) From 0209dedc6bf21e2c56710dd2714336605fa621f5 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 2 Apr 2024 15:04:08 -0400 Subject: [PATCH 13/14] Address nits --- x/merkledb/view.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/x/merkledb/view.go b/x/merkledb/view.go index 77852246e83..9480ef67f72 100644 --- a/x/merkledb/view.go +++ b/x/merkledb/view.go @@ -311,9 +311,9 @@ func (v *view) hashChangedNode(n *node) ids.ID { childBuffer = make([]byte, 1) dualIndex = dualBitIndex(v.tokenSize) bytesForKey = bytesNeeded(n.key.length) - // We track the last byte used by the key so that we can reset the - // value. for each key. This is needed because the child buffer may get - // ORed with this byte. + // We track the last byte of [n.key] so that we can reset the value for + // each key. This is needed because the child buffer may get ORed at + // this byte. lastKeyByte byte // We use [wg] to wait until all descendants of [n] have been updated. @@ -329,7 +329,9 @@ func (v *view) hashChangedNode(n *node) ids.ID { for childIndex, childEntry := range n.children { childBuffer[0] = childIndex << dualIndex - childByteKey := Key{ + childIndexAsKey := Key{ + // It is safe to use byteSliceToString because [childBuffer] is not + // modified while [childIndexAsKey] is in use. value: byteSliceToString(childBuffer), length: v.tokenSize, } @@ -340,9 +342,11 @@ func (v *view) hashChangedNode(n *node) ids.ID { if bytesForKey > 0 { buffer[bytesForKey-1] = lastKeyByte } - extendIntoBuffer(buffer, childByteKey, n.key.length) + extendIntoBuffer(buffer, childIndexAsKey, n.key.length) extendIntoBuffer(buffer, childEntry.compressedKey, n.key.length+v.tokenSize) childKey := Key{ + // It is safe to use byteSliceToString because [buffer] is not + // modified while [childKey] is in use. value: byteSliceToString(buffer), length: totalBitLength, } From 97109b3d78a5054b72026ea7fa5132a43b642b74 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 2 Apr 2024 16:30:47 -0400 Subject: [PATCH 14/14] nit --- x/merkledb/view.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x/merkledb/view.go b/x/merkledb/view.go index 3732720bb43..f47e772b685 100644 --- a/x/merkledb/view.go +++ b/x/merkledb/view.go @@ -320,8 +320,8 @@ func (v *view) hashChangedNode(n *node) ids.ID { ) if bytesForKey > 0 { - // We can just copy this node's key once. It doesn't change as we - // iterate over the children. + // We only need to copy this node's key once because it does not change + // as we iterate over the children. copy(keyBuffer, n.key.value) lastKeyByte = keyBuffer[bytesForKey-1] }