From 08b38c9e10808c4ea3fc753e0bf16794575e47ef Mon Sep 17 00:00:00 2001 From: dboehm-avalabs Date: Tue, 11 Apr 2023 10:42:43 -0400 Subject: [PATCH 01/11] Fix Concurrency Bug In CommitToParent --- x/merkledb/db.go | 4 +- x/merkledb/trie_test.go | 82 +++++++++++++++++++++++++++++++++++++++++ x/merkledb/trieview.go | 8 ++-- 3 files changed, 89 insertions(+), 5 deletions(-) diff --git a/x/merkledb/db.go b/x/merkledb/db.go index 88a63d23cac1..8ff406b7e48c 100644 --- a/x/merkledb/db.go +++ b/x/merkledb/db.go @@ -752,8 +752,8 @@ func (db *Database) commitChanges(ctx context.Context, trieToCommit *trieView) e // invalidate all child views except for the view being committed db.invalidateChildrenExcept(trieToCommit) - // move any child views of the committed trie onto the db - db.moveChildViewsToDB(trieToCommit) + // move any child views of the committed trie onto the db after changes have been made + defer db.moveChildViewsToDB(trieToCommit) if len(changes.nodes) == 0 { return nil diff --git a/x/merkledb/trie_test.go b/x/merkledb/trie_test.go index 27bdaa912c1d..af61462362b4 100644 --- a/x/merkledb/trie_test.go +++ b/x/merkledb/trie_test.go @@ -7,6 +7,7 @@ import ( "context" "math/rand" "strconv" + "sync" "testing" "github.com/stretchr/testify/require" @@ -1178,3 +1179,84 @@ func TestTrieViewInvalidChildrenExcept(t *testing.T) { require.True(view3.invalidated) require.Empty(view1.childViews) } + +func Test_Trie_CommitToParentView_Concurrent(t *testing.T) { + for i := 0; i < 5000; i++ { + dbTrie, err := getBasicDB() + require.NoError(t, err) + require.NotNil(t, dbTrie) + + baseView, err := dbTrie.NewView() + require.NoError(t, err) + + parentView, err := baseView.NewView() + require.NoError(t, err) + err = parentView.Insert(context.Background(), []byte{0}, []byte{0}) + require.NoError(t, err) + + childView, err := parentView.NewView() + require.NoError(t, err) + err = childView.Insert(context.Background(), []byte{1}, []byte{1}) + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + require.NoError(t, parentView.CommitToParent(context.Background())) + }() + go func() { + defer wg.Done() + go require.NoError(t, childView.CommitToParent(context.Background())) + }() + + wg.Wait() + + val0, err := baseView.GetValue(context.Background(), []byte{0}) + require.NoError(t, err) + require.Equal(t, []byte{0}, val0) + + val1, err := baseView.GetValue(context.Background(), []byte{1}) + require.NoError(t, err) + require.Equal(t, []byte{1}, val1) + } +} + +func Test_Trie_CommitToParentDB_Concurrent(t *testing.T) { + for i := 0; i < 5000; i++ { + dbTrie, err := getBasicDB() + require.NoError(t, err) + require.NotNil(t, dbTrie) + + parentView, err := dbTrie.NewView() + require.NoError(t, err) + err = parentView.Insert(context.Background(), []byte{0}, []byte{0}) + require.NoError(t, err) + + childView, err := parentView.NewView() + require.NoError(t, err) + err = childView.Insert(context.Background(), []byte{1}, []byte{1}) + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + require.NoError(t, parentView.CommitToParent(context.Background())) + }() + go func() { + defer wg.Done() + go require.NoError(t, childView.CommitToParent(context.Background())) + }() + + wg.Wait() + + val0, err := dbTrie.GetValue(context.Background(), []byte{0}) + require.NoError(t, err) + require.Equal(t, []byte{0}, val0) + + val1, err := dbTrie.GetValue(context.Background(), []byte{1}) + require.NoError(t, err) + require.Equal(t, []byte{1}, val1) + } +} diff --git a/x/merkledb/trieview.go b/x/merkledb/trieview.go index 76ff0f4ab43c..63f8a7903d0c 100644 --- a/x/merkledb/trieview.go +++ b/x/merkledb/trieview.go @@ -515,12 +515,14 @@ func (t *trieView) commitChanges(ctx context.Context, trieToCommit *trieView) er case trieToCommit == nil: // no changes to apply return nil - case trieToCommit.getParentTrie() != t: - // trieToCommit needs to be a child of t, otherwise the changes merge would not work - return ErrViewIsNotAChild case trieToCommit.isInvalid(): // don't apply changes from an invalid view return ErrInvalid + case t.committed: + return t.getParentTrie().commitChanges(ctx, trieToCommit) + case trieToCommit.getParentTrie() != t: + // trieToCommit needs to be a child of t, otherwise the changes merge would not work + return ErrViewIsNotAChild } // Invalidate all child views except the view being committed. From 685bc51c67cc211c1812b74bcfea6f02080fa154 Mon Sep 17 00:00:00 2001 From: dboehm-avalabs Date: Tue, 11 Apr 2023 10:58:55 -0400 Subject: [PATCH 02/11] Update trieview.go --- x/merkledb/trieview.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/x/merkledb/trieview.go b/x/merkledb/trieview.go index 63f8a7903d0c..86b166d0a4d7 100644 --- a/x/merkledb/trieview.go +++ b/x/merkledb/trieview.go @@ -222,10 +222,8 @@ func (t *trieView) calculateNodeIDs(ctx context.Context) error { // ensure that the view under this one is up-to-date before potentially pulling in nodes from it // getting the Merkle root forces any unupdated nodes to recalculate their ids - if t.parentTrie != nil { - if _, err := t.getParentTrie().GetMerkleRoot(ctx); err != nil { - return err - } + if _, err := t.getParentTrie().GetMerkleRoot(ctx); err != nil { + return err } if err := t.applyChangedValuesToTrie(ctx); err != nil { From a6312e310677b5f0d9128929f6b3a08e3ae20d62 Mon Sep 17 00:00:00 2001 From: dboehm-avalabs Date: Tue, 11 Apr 2023 12:24:32 -0400 Subject: [PATCH 03/11] fix --- x/merkledb/db.go | 2 +- x/merkledb/trieview.go | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/x/merkledb/db.go b/x/merkledb/db.go index 8ff406b7e48c..805b096d0f81 100644 --- a/x/merkledb/db.go +++ b/x/merkledb/db.go @@ -753,7 +753,7 @@ func (db *Database) commitChanges(ctx context.Context, trieToCommit *trieView) e db.invalidateChildrenExcept(trieToCommit) // move any child views of the committed trie onto the db after changes have been made - defer db.moveChildViewsToDB(trieToCommit) + db.moveChildViewsToDB(trieToCommit) if len(changes.nodes) == 0 { return nil diff --git a/x/merkledb/trieview.go b/x/merkledb/trieview.go index 1646218a87bb..5aa04cfc5eff 100644 --- a/x/merkledb/trieview.go +++ b/x/merkledb/trieview.go @@ -27,7 +27,7 @@ const defaultPreallocationSize = 100 var ( ErrCommitted = errors.New("view has been committed") - ErrInvalid = errors.New("the trie this view was based on has changed, rending this view invalid") + ErrInvalid = errors.New("the trie this view was based on has changed, rendering this view invalid") ErrOddLengthWithValue = errors.New( "the underlying db only supports whole number of byte keys, so cannot record changes with odd nibble length", ) @@ -601,9 +601,6 @@ func (t *trieView) commitToParent(ctx context.Context) error { if err := t.getParentTrie().commitChanges(ctx, t); err != nil { return err } - if t.isInvalid() { - return ErrInvalid - } t.committed = true From e98a42891c61fcc8645d6cc92dd957c78b14a363 Mon Sep 17 00:00:00 2001 From: David Boehm <91908103+dboehm-avalabs@users.noreply.github.com> Date: Tue, 11 Apr 2023 13:11:53 -0400 Subject: [PATCH 04/11] Update x/merkledb/db.go Co-authored-by: Dan Laine --- x/merkledb/db.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x/merkledb/db.go b/x/merkledb/db.go index 805b096d0f81..88a63d23cac1 100644 --- a/x/merkledb/db.go +++ b/x/merkledb/db.go @@ -752,7 +752,7 @@ func (db *Database) commitChanges(ctx context.Context, trieToCommit *trieView) e // invalidate all child views except for the view being committed db.invalidateChildrenExcept(trieToCommit) - // move any child views of the committed trie onto the db after changes have been made + // move any child views of the committed trie onto the db db.moveChildViewsToDB(trieToCommit) if len(changes.nodes) == 0 { From 5e1015da59760874308a67b00a23cf512ae36c87 Mon Sep 17 00:00:00 2001 From: dboehm-avalabs Date: Tue, 11 Apr 2023 13:16:05 -0400 Subject: [PATCH 05/11] Update trie_test.go --- x/merkledb/trie_test.go | 42 +++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/x/merkledb/trie_test.go b/x/merkledb/trie_test.go index 241d54cc26e5..530a2db633fc 100644 --- a/x/merkledb/trie_test.go +++ b/x/merkledb/trie_test.go @@ -1195,20 +1195,29 @@ func Test_Trie_CommitToParentView_Concurrent(t *testing.T) { err = parentView.Insert(context.Background(), []byte{0}, []byte{0}) require.NoError(t, err) - childView, err := parentView.NewView() + childView1, err := parentView.NewView() require.NoError(t, err) - err = childView.Insert(context.Background(), []byte{1}, []byte{1}) + err = childView1.Insert(context.Background(), []byte{1}, []byte{1}) + require.NoError(t, err) + + childView2, err := childView1.NewView() + require.NoError(t, err) + err = childView2.Insert(context.Background(), []byte{2}, []byte{2}) require.NoError(t, err) var wg sync.WaitGroup - wg.Add(2) + wg.Add(3) go func() { defer wg.Done() require.NoError(t, parentView.CommitToParent(context.Background())) }() go func() { defer wg.Done() - go require.NoError(t, childView.CommitToParent(context.Background())) + require.NoError(t, childView1.CommitToParent(context.Background())) + }() + go func() { + defer wg.Done() + require.NoError(t, childView2.CommitToParent(context.Background())) }() wg.Wait() @@ -1220,6 +1229,10 @@ func Test_Trie_CommitToParentView_Concurrent(t *testing.T) { val1, err := baseView.GetValue(context.Background(), []byte{1}) require.NoError(t, err) require.Equal(t, []byte{1}, val1) + + val2, err := baseView.GetValue(context.Background(), []byte{2}) + require.NoError(t, err) + require.Equal(t, []byte{2}, val2) } } @@ -1234,20 +1247,29 @@ func Test_Trie_CommitToParentDB_Concurrent(t *testing.T) { err = parentView.Insert(context.Background(), []byte{0}, []byte{0}) require.NoError(t, err) - childView, err := parentView.NewView() + childView1, err := parentView.NewView() require.NoError(t, err) - err = childView.Insert(context.Background(), []byte{1}, []byte{1}) + err = childView1.Insert(context.Background(), []byte{1}, []byte{1}) + require.NoError(t, err) + + childView2, err := childView1.NewView() + require.NoError(t, err) + err = childView2.Insert(context.Background(), []byte{2}, []byte{2}) require.NoError(t, err) var wg sync.WaitGroup - wg.Add(2) + wg.Add(3) go func() { defer wg.Done() require.NoError(t, parentView.CommitToParent(context.Background())) }() go func() { defer wg.Done() - go require.NoError(t, childView.CommitToParent(context.Background())) + require.NoError(t, childView1.CommitToParent(context.Background())) + }() + go func() { + defer wg.Done() + require.NoError(t, childView2.CommitToParent(context.Background())) }() wg.Wait() @@ -1259,6 +1281,10 @@ func Test_Trie_CommitToParentDB_Concurrent(t *testing.T) { val1, err := dbTrie.GetValue(context.Background(), []byte{1}) require.NoError(t, err) require.Equal(t, []byte{1}, val1) + + val2, err := dbTrie.GetValue(context.Background(), []byte{2}) + require.NoError(t, err) + require.Equal(t, []byte{2}, val2) } } From 52979e82acf0e3794d60f4bfd1fdbaa911d44301 Mon Sep 17 00:00:00 2001 From: dboehm-avalabs Date: Tue, 11 Apr 2023 13:37:35 -0400 Subject: [PATCH 06/11] Update trieview.go --- x/merkledb/trieview.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/x/merkledb/trieview.go b/x/merkledb/trieview.go index 5aa04cfc5eff..0dd3deec43d7 100644 --- a/x/merkledb/trieview.go +++ b/x/merkledb/trieview.go @@ -567,21 +567,15 @@ func (t *trieView) commitChanges(ctx context.Context, trieToCommit *trieView) er // CommitToParent commits the changes from this view to its parent Trie func (t *trieView) CommitToParent(ctx context.Context) error { - // if we are about to write to the db, then we to hold the commitLock - if t.getParentTrie() == t.db { - t.db.commitLock.Lock() - defer t.db.commitLock.Unlock() - } - t.lock.Lock() defer t.lock.Unlock() - return t.commitToParent(ctx) + return t.commitToParent(ctx, true) } // commitToParent commits the changes from this view to its parent Trie // assumes [t.lock] is held -func (t *trieView) commitToParent(ctx context.Context) error { +func (t *trieView) commitToParent(ctx context.Context, commitLockIfParentDB bool) error { ctx, span := t.db.tracer.Start(ctx, "MerkleDB.trieview.commitToParent") defer span.End() @@ -598,9 +592,17 @@ func (t *trieView) commitToParent(ctx context.Context) error { } // overwrite this view with changes from the incoming view - if err := t.getParentTrie().commitChanges(ctx, t); err != nil { + parent := t.getParentTrie() + if commitLockIfParentDB && parent == t.db { + t.db.commitLock.Lock() + defer t.db.commitLock.Unlock() + } + if err := parent.commitChanges(ctx, t); err != nil { return err } + if t.isInvalid() { + return ErrInvalid + } t.committed = true @@ -620,7 +622,7 @@ func (t *trieView) commitToDB(ctx context.Context) error { defer span.End() // first merge changes into the parent trie - if err := t.commitToParent(ctx); err != nil { + if err := t.commitToParent(ctx, false); err != nil { return err } From 4d35c960a19440cca805a2c6d4e1cc363299dd48 Mon Sep 17 00:00:00 2001 From: dboehm-avalabs Date: Tue, 11 Apr 2023 14:33:06 -0400 Subject: [PATCH 07/11] force serial commits --- x/merkledb/trieview.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/x/merkledb/trieview.go b/x/merkledb/trieview.go index 0dd3deec43d7..ea69c72846b3 100644 --- a/x/merkledb/trieview.go +++ b/x/merkledb/trieview.go @@ -567,6 +567,9 @@ func (t *trieView) commitChanges(ctx context.Context, trieToCommit *trieView) er // CommitToParent commits the changes from this view to its parent Trie func (t *trieView) CommitToParent(ctx context.Context) error { + t.db.commitLock.Lock() + defer t.db.commitLock.Unlock() + t.lock.Lock() defer t.lock.Unlock() From 7f8bbe857b8a3fe8a4c9ae7b21afdff45640a056 Mon Sep 17 00:00:00 2001 From: dboehm-avalabs Date: Tue, 11 Apr 2023 14:34:56 -0400 Subject: [PATCH 08/11] Update trieview.go --- x/merkledb/trieview.go | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/x/merkledb/trieview.go b/x/merkledb/trieview.go index ea69c72846b3..bcfe2782296f 100644 --- a/x/merkledb/trieview.go +++ b/x/merkledb/trieview.go @@ -516,8 +516,6 @@ func (t *trieView) commitChanges(ctx context.Context, trieToCommit *trieView) er case trieToCommit.isInvalid(): // don't apply changes from an invalid view return ErrInvalid - case t.committed: - return t.getParentTrie().commitChanges(ctx, trieToCommit) case trieToCommit.getParentTrie() != t: // trieToCommit needs to be a child of t, otherwise the changes merge would not work return ErrViewIsNotAChild @@ -573,12 +571,12 @@ func (t *trieView) CommitToParent(ctx context.Context) error { t.lock.Lock() defer t.lock.Unlock() - return t.commitToParent(ctx, true) + return t.commitToParent(ctx) } // commitToParent commits the changes from this view to its parent Trie // assumes [t.lock] is held -func (t *trieView) commitToParent(ctx context.Context, commitLockIfParentDB bool) error { +func (t *trieView) commitToParent(ctx context.Context) error { ctx, span := t.db.tracer.Start(ctx, "MerkleDB.trieview.commitToParent") defer span.End() @@ -594,13 +592,7 @@ func (t *trieView) commitToParent(ctx context.Context, commitLockIfParentDB bool return err } - // overwrite this view with changes from the incoming view - parent := t.getParentTrie() - if commitLockIfParentDB && parent == t.db { - t.db.commitLock.Lock() - defer t.db.commitLock.Unlock() - } - if err := parent.commitChanges(ctx, t); err != nil { + if err := t.getParentTrie().commitChanges(ctx, t); err != nil { return err } if t.isInvalid() { @@ -625,7 +617,7 @@ func (t *trieView) commitToDB(ctx context.Context) error { defer span.End() // first merge changes into the parent trie - if err := t.commitToParent(ctx, false); err != nil { + if err := t.commitToParent(ctx); err != nil { return err } From 4131c5d6d82a96d9e9f485b3b9ca2d215c19db88 Mon Sep 17 00:00:00 2001 From: dboehm-avalabs Date: Tue, 11 Apr 2023 14:35:29 -0400 Subject: [PATCH 09/11] Update trieview.go --- x/merkledb/trieview.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x/merkledb/trieview.go b/x/merkledb/trieview.go index bcfe2782296f..a9c5299aeab0 100644 --- a/x/merkledb/trieview.go +++ b/x/merkledb/trieview.go @@ -513,12 +513,12 @@ func (t *trieView) commitChanges(ctx context.Context, trieToCommit *trieView) er case trieToCommit == nil: // no changes to apply return nil - case trieToCommit.isInvalid(): - // don't apply changes from an invalid view - return ErrInvalid case trieToCommit.getParentTrie() != t: // trieToCommit needs to be a child of t, otherwise the changes merge would not work return ErrViewIsNotAChild + case trieToCommit.isInvalid(): + // don't apply changes from an invalid view + return ErrInvalid } // Invalidate all child views except the view being committed. From fc53d2ebbc0c81d2f9b12f629e17e9af48a1b026 Mon Sep 17 00:00:00 2001 From: dboehm-avalabs Date: Tue, 11 Apr 2023 14:39:03 -0400 Subject: [PATCH 10/11] Update trieview.go --- x/merkledb/trieview.go | 1 + 1 file changed, 1 insertion(+) diff --git a/x/merkledb/trieview.go b/x/merkledb/trieview.go index a9c5299aeab0..45d54553e0e9 100644 --- a/x/merkledb/trieview.go +++ b/x/merkledb/trieview.go @@ -592,6 +592,7 @@ func (t *trieView) commitToParent(ctx context.Context) error { return err } + // write this view's changes into its parent if err := t.getParentTrie().commitChanges(ctx, t); err != nil { return err } From debdebcb1e0f69c0e85b5c27877de76883c19fa6 Mon Sep 17 00:00:00 2001 From: dboehm-avalabs Date: Tue, 11 Apr 2023 15:09:48 -0400 Subject: [PATCH 11/11] Update trieview.go --- x/merkledb/trieview.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x/merkledb/trieview.go b/x/merkledb/trieview.go index 45d54553e0e9..a1fc676d2b9a 100644 --- a/x/merkledb/trieview.go +++ b/x/merkledb/trieview.go @@ -565,6 +565,8 @@ func (t *trieView) commitChanges(ctx context.Context, trieToCommit *trieView) er // CommitToParent commits the changes from this view to its parent Trie func (t *trieView) CommitToParent(ctx context.Context) error { + // TODO: Only lock the commitlock when the parent is the DB + // TODO: fix concurrency bugs with CommitToParent t.db.commitLock.Lock() defer t.db.commitLock.Unlock()