From 680cedea68be2b88dc4e8d9cbf0db043e2dcde9f Mon Sep 17 00:00:00 2001 From: Roman Date: Thu, 17 Feb 2022 20:01:04 -0800 Subject: [PATCH 1/6] force GC, no cache during migration, auto heap profile --- mutable_tree.go | 69 ++++++++++++++++++++++++++++++++++++++++++++++++- nodedb.go | 18 +++++++++---- 2 files changed, 81 insertions(+), 6 deletions(-) diff --git a/mutable_tree.go b/mutable_tree.go index 0c14af666..9e47cc041 100644 --- a/mutable_tree.go +++ b/mutable_tree.go @@ -4,8 +4,12 @@ import ( "bytes" "crypto/sha256" "fmt" + "os" + "runtime" + "runtime/pprof" "sort" "sync" + "time" "github.com/pkg/errors" @@ -534,6 +538,9 @@ func (tree *MutableTree) enableFastStorageAndCommitIfNotEnabled() (bool, error) fastItr.Close() } + // Force garbage collection before we proceed to enabling fast storage. + runtime.GC() + if err := tree.enableFastStorageAndCommit(); err != nil { tree.ndb.storageVersion = defaultStorageVersionValue return false, err @@ -558,23 +565,83 @@ func (tree *MutableTree) enableFastStorageAndCommit() error { } }() + done := make(chan struct{}) + defer func() { + fmt.Println("signaling done") + done <- struct{}{} + close(done) + fmt.Println("closed") + }() + + go func () { + timer := time.NewTimer(time.Second) + defer func () { + if !timer.Stop() { + fmt.Println("stop") + <-timer.C + } + }() + + var m runtime.MemStats + + hasTakenHeapProfile := false + + for { + // Sample the current memory usage + runtime.ReadMemStats(&m) + + if m.Alloc > 4 * 1024 * 1024 * 1024 { + // If we are using more than 4GB of memory, we should trigger garbage collection + // to free up some memory. + fmt.Println("gc") + runtime.GC() + } + + if !hasTakenHeapProfile && m.Alloc > 8 * 1024 * 1024 * 1024 { + // If we are using more than 8GB of memory, we should write a pprof sample + fmt.Println("pprof heap") + time := time.Now() + heapProfilePath := "/tmp/heap_profile" + time.String() + ".pprof" + heapFile, _ := os.Create(heapProfilePath) + pprof.WriteHeapProfile(heapFile) + heapFile.Close() + hasTakenHeapProfile = true + } + + select { + case <-timer.C: + fmt.Println("reset") + timer.Reset(time.Second) + case <-done: + fmt.Println("done") + return + } + } + }() + itr := NewIterator(nil, nil, true, tree.ImmutableTree) defer itr.Close() for ; itr.Valid(); itr.Next() { - if err = tree.ndb.SaveFastNode(NewFastNode(itr.Key(), itr.Value(), tree.version)); err != nil { + if err = tree.ndb.SaveFastNodeNoCache(NewFastNode(itr.Key(), itr.Value(), tree.version)); err != nil { + fmt.Println("save error") return err } } + fmt.Println("finished") + if err = itr.Error(); err != nil { + fmt.Println("itr error") return err } if err = tree.ndb.setFastStorageVersionToBatch(); err != nil { + fmt.Println("save version error") return err } if err = tree.ndb.Commit(); err != nil { + fmt.Println("commit error") return err } return nil diff --git a/nodedb.go b/nodedb.go index c1843a7b4..7bee811c5 100644 --- a/nodedb.go +++ b/nodedb.go @@ -215,11 +215,18 @@ func (ndb *nodeDB) SaveNode(node *Node) { ndb.cacheNode(node) } -// SaveNode saves a FastNode to disk. +// SaveNode saves a FastNode to disk and add to cache. func (ndb *nodeDB) SaveFastNode(node *FastNode) error { ndb.mtx.Lock() defer ndb.mtx.Unlock() - return ndb.saveFastNodeUnlocked(node) + return ndb.saveFastNodeUnlocked(node, true) +} + +// SaveNode saves a FastNode to disk without adding to cache. +func (ndb *nodeDB) SaveFastNodeNoCache(node *FastNode) error { + ndb.mtx.Lock() + defer ndb.mtx.Unlock() + return ndb.saveFastNodeUnlocked(node, false) } // setFastStorageVersionToBatch sets storage version to fast where the version is @@ -274,7 +281,7 @@ func (ndb *nodeDB) shouldForceFastStorageUpgrade() bool { } // SaveNode saves a FastNode to disk. -func (ndb *nodeDB) saveFastNodeUnlocked(node *FastNode) error { +func (ndb *nodeDB) saveFastNodeUnlocked(node *FastNode, shouldAddToCache bool) error { if node.key == nil { return fmt.Errorf("FastNode cannot have a nil value for key") } @@ -290,8 +297,9 @@ func (ndb *nodeDB) saveFastNodeUnlocked(node *FastNode) error { if err := ndb.batch.Set(ndb.fastNodeKey(node.key), buf.Bytes()); err != nil { return fmt.Errorf("error while writing key/val to nodedb batch. Err: %w", err) } - debug("BATCH SAVE %X %p\n", node.key, node) - ndb.cacheFastNode(node) + if shouldAddToCache { + ndb.cacheFastNode(node) + } return nil } From d7810708ba7e5cda318c1f6c7b46a1b7d5f06004 Mon Sep 17 00:00:00 2001 From: Roman Date: Thu, 17 Feb 2022 20:19:59 -0800 Subject: [PATCH 2/6] resolve a potential deadlock from racing between reset and stop --- mutable_tree.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/mutable_tree.go b/mutable_tree.go index 9e47cc041..6013b7b53 100644 --- a/mutable_tree.go +++ b/mutable_tree.go @@ -575,13 +575,6 @@ func (tree *MutableTree) enableFastStorageAndCommit() error { go func () { timer := time.NewTimer(time.Second) - defer func () { - if !timer.Stop() { - fmt.Println("stop") - <-timer.C - } - }() - var m runtime.MemStats hasTakenHeapProfile := false @@ -610,9 +603,12 @@ func (tree *MutableTree) enableFastStorageAndCommit() error { select { case <-timer.C: - fmt.Println("reset") timer.Reset(time.Second) case <-done: + if !timer.Stop() { + fmt.Println("stop") + <-timer.C + } fmt.Println("done") return } @@ -644,6 +640,8 @@ func (tree *MutableTree) enableFastStorageAndCommit() error { fmt.Println("commit error") return err } + + fmt.Println("finished clean") return nil } From e41f1b6b471ee2e47b2444ccc8608899fd8f7885 Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 18 Feb 2022 07:50:37 -0800 Subject: [PATCH 3/6] fix small lint issue --- mutable_tree.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mutable_tree.go b/mutable_tree.go index 6013b7b53..1d3d4180d 100644 --- a/mutable_tree.go +++ b/mutable_tree.go @@ -532,10 +532,12 @@ func (tree *MutableTree) enableFastStorageAndCommitIfNotEnabled() (bool, error) // Therefore, there might exist stale fast nodes on disk. As a result, to avoid persisting the stale state, it might // be worth to delete the fast nodes from disk. fastItr := NewFastIterator(nil, nil, true, tree.ndb) + defer fastItr.Close() for ; fastItr.Valid(); fastItr.Next() { - tree.ndb.DeleteFastNode(fastItr.Key()) + if err := tree.ndb.DeleteFastNode(fastItr.Key()); err != nil { + return false, err + } } - fastItr.Close() } // Force garbage collection before we proceed to enabling fast storage. @@ -590,7 +592,7 @@ func (tree *MutableTree) enableFastStorageAndCommit() error { runtime.GC() } - if !hasTakenHeapProfile && m.Alloc > 8 * 1024 * 1024 * 1024 { + if !hasTakenHeapProfile && m.Alloc > 4 * 1024 * 1024 * 1024 { // If we are using more than 8GB of memory, we should write a pprof sample fmt.Println("pprof heap") time := time.Now() From ac57696bf821d9d40571ee46a0ce16dd84c5ff8d Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 18 Feb 2022 08:24:26 -0800 Subject: [PATCH 4/6] remove logs and pprof logic --- mutable_tree.go | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/mutable_tree.go b/mutable_tree.go index 1d3d4180d..abda3ad3a 100644 --- a/mutable_tree.go +++ b/mutable_tree.go @@ -569,18 +569,14 @@ func (tree *MutableTree) enableFastStorageAndCommit() error { done := make(chan struct{}) defer func() { - fmt.Println("signaling done") done <- struct{}{} close(done) - fmt.Println("closed") }() go func () { timer := time.NewTimer(time.Second) var m runtime.MemStats - hasTakenHeapProfile := false - for { // Sample the current memory usage runtime.ReadMemStats(&m) @@ -588,30 +584,16 @@ func (tree *MutableTree) enableFastStorageAndCommit() error { if m.Alloc > 4 * 1024 * 1024 * 1024 { // If we are using more than 4GB of memory, we should trigger garbage collection // to free up some memory. - fmt.Println("gc") runtime.GC() } - if !hasTakenHeapProfile && m.Alloc > 4 * 1024 * 1024 * 1024 { - // If we are using more than 8GB of memory, we should write a pprof sample - fmt.Println("pprof heap") - time := time.Now() - heapProfilePath := "/tmp/heap_profile" + time.String() + ".pprof" - heapFile, _ := os.Create(heapProfilePath) - pprof.WriteHeapProfile(heapFile) - heapFile.Close() - hasTakenHeapProfile = true - } - select { case <-timer.C: timer.Reset(time.Second) case <-done: if !timer.Stop() { - fmt.Println("stop") <-timer.C } - fmt.Println("done") return } } @@ -621,29 +603,21 @@ func (tree *MutableTree) enableFastStorageAndCommit() error { defer itr.Close() for ; itr.Valid(); itr.Next() { if err = tree.ndb.SaveFastNodeNoCache(NewFastNode(itr.Key(), itr.Value(), tree.version)); err != nil { - fmt.Println("save error") return err } } - fmt.Println("finished") - if err = itr.Error(); err != nil { - fmt.Println("itr error") return err } if err = tree.ndb.setFastStorageVersionToBatch(); err != nil { - fmt.Println("save version error") return err } if err = tree.ndb.Commit(); err != nil { - fmt.Println("commit error") return err } - - fmt.Println("finished clean") return nil } From 2b4e8f570edd2c4ea5539b0300707ba2896eea1e Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 18 Feb 2022 08:30:22 -0800 Subject: [PATCH 5/6] remove unused libraries --- mutable_tree.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/mutable_tree.go b/mutable_tree.go index abda3ad3a..4be398975 100644 --- a/mutable_tree.go +++ b/mutable_tree.go @@ -4,9 +4,7 @@ import ( "bytes" "crypto/sha256" "fmt" - "os" "runtime" - "runtime/pprof" "sort" "sync" "time" From 3a68941b722ba5c148769959cad3eab6bc87e10d Mon Sep 17 00:00:00 2001 From: Roman Date: Sat, 19 Feb 2022 17:33:00 -0800 Subject: [PATCH 6/6] add comment explaining the reason for RAM optimizations --- mutable_tree.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mutable_tree.go b/mutable_tree.go index 4be398975..078c6eb2b 100644 --- a/mutable_tree.go +++ b/mutable_tree.go @@ -565,6 +565,9 @@ func (tree *MutableTree) enableFastStorageAndCommit() error { } }() + // We start a new thread to keep on checking if we are above 4GB, and if so garbage collect. + // This thread only lasts during the fast node migration. + // This is done to keep RAM usage down. done := make(chan struct{}) defer func() { done <- struct{}{}