From d57e36dc059fa0433a421ca54487433de6736f2c Mon Sep 17 00:00:00 2001 From: ValarDragon Date: Fri, 23 Jul 2021 20:35:32 -0500 Subject: [PATCH 1/4] Fix cache_kv_store n^2 problem --- store/cachekv/store.go | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/store/cachekv/store.go b/store/cachekv/store.go index cd7087c5dc7b..cd2f52aa1ffe 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -200,18 +200,8 @@ func byteSliceToStr(b []byte) string { return *(*string)(unsafe.Pointer(hdr)) } -// Constructs a slice of dirty items, to use w/ memIterator. -func (store *Store) dirtyItems(start, end []byte) { - unsorted := make([]*kv.Pair, 0) - +func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) { n := len(store.unsortedCache) - for key := range store.unsortedCache { - if dbm.IsKeyInDomain(strToByte(key), start, end) { - cacheValue := store.cache[key] - unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) - } - } - if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map. for key := range store.unsortedCache { delete(store.unsortedCache, key) @@ -250,6 +240,33 @@ func (store *Store) dirtyItems(start, end []byte) { } } +// Constructs a slice of dirty items, to use w/ memIterator. +func (store *Store) dirtyItems(start, end []byte) { + n := len(store.unsortedCache) + unsorted := make([]*kv.Pair, 0) + // If the unsortedCache is too big, its costs too much to determine + // whats in the subset we are concerned about. + // If you are interleaving iterator calls with writes, this can easily become an + // O(N^2) overhead. + // Even without that, too many range checks eventually becomes more expensive + // than just not having the cache. + if n >= 1024 { + for key := range store.unsortedCache { + cacheValue := store.cache[key] + unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) + } + } else { + // else do a linear scan to determine if the unsorted pairs are in the pool. + for key := range store.unsortedCache { + if dbm.IsKeyInDomain(strToByte(key), start, end) { + cacheValue := store.cache[key] + unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) + } + } + } + store.clearUnsortedCacheSubset(unsorted) +} + //---------------------------------------- // etc From 521fa2ffa3ddb5a4ea81c3d4e701189b5fe0658c Mon Sep 17 00:00:00 2001 From: ValarDragon Date: Fri, 23 Jul 2021 23:32:30 -0500 Subject: [PATCH 2/4] Use a binary search on the linked list to find the start of the range --- store/cachekv/memiterator.go | 41 ++++++++++++++------ store/cachekv/store_test.go | 5 +++ types/kv/list.go | 72 ++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 12 deletions(-) diff --git a/store/cachekv/memiterator.go b/store/cachekv/memiterator.go index b197ac141660..268b5a2c97b7 100644 --- a/store/cachekv/memiterator.go +++ b/store/cachekv/memiterator.go @@ -1,6 +1,7 @@ package cachekv import ( + "bytes" "errors" dbm "github.com/tendermint/tm-db" @@ -17,23 +18,39 @@ type memIterator struct { ascending bool } -func newMemIterator(start, end []byte, items *kv.List, ascending bool) *memIterator { - itemsInDomain := make([]*kv.Pair, 0, items.Len()) - - var entered bool +func newMemIterator(start, end []byte, sortedItems *kv.List, ascending bool) *memIterator { + itemsInDomain := make([]*kv.Pair, 0, sortedItems.Len()) + + // Old code, that can generously be called optimized for small sorted cache sizes. + // (Notable perf issue exists for dbm.IsKeyInDomain repeating the start compare) + // This has been checked to yield the same result as the other case for larger inputs + if sortedItems.Len() <= 64 { + var entered bool + for e := sortedItems.Front(); e != nil; e = e.Next() { + item := e.Value + if !dbm.IsKeyInDomain(item.Key, start, end) { + if entered { + break + } + + continue + } - for e := items.Front(); e != nil; e = e.Next() { - item := e.Value - if !dbm.IsKeyInDomain(item.Key, start, end) { - if entered { + itemsInDomain = append(itemsInDomain, item) + entered = true + } + } else { + // Code that handles large cache sizes. + // Find start point of range. + startElem := sortedItems.SortedSearch(start) + for e := startElem; e != nil; e = e.Next() { + item := e.Value + if end != nil && bytes.Compare(end, item.Key) <= 0 { break } - continue + itemsInDomain = append(itemsInDomain, item) } - - itemsInDomain = append(itemsInDomain, item) - entered = true } return &memIterator{ diff --git a/store/cachekv/store_test.go b/store/cachekv/store_test.go index e3b33341b8c4..736786465b90 100644 --- a/store/cachekv/store_test.go +++ b/store/cachekv/store_test.go @@ -354,23 +354,28 @@ func doRandomOp(t *testing.T, st types.CacheKVStore, truth dbm.DB, maxKey int) { switch r { case opSet: k := randInt(maxKey) + fmt.Println("opSet", k) st.Set(keyFmt(k), valFmt(k)) err := truth.Set(keyFmt(k), valFmt(k)) require.NoError(t, err) case opSetRange: start := randInt(maxKey - 2) end := randInt(maxKey-start) + start + fmt.Println("opSetRange", start, end) setRange(t, st, truth, start, end) case opDel: k := randInt(maxKey) + fmt.Println("opDel", k) st.Delete(keyFmt(k)) err := truth.Delete(keyFmt(k)) require.NoError(t, err) case opDelRange: start := randInt(maxKey - 2) end := randInt(maxKey-start) + start + fmt.Println("opDelRange", start, end) deleteRange(t, st, truth, start, end) case opWrite: + fmt.Println("opWrite") st.Write() } } diff --git a/types/kv/list.go b/types/kv/list.go index 9e928c84912b..ff29a38aa103 100644 --- a/types/kv/list.go +++ b/types/kv/list.go @@ -1,5 +1,9 @@ package kv +import ( + "bytes" +) + // This code was copied from golang.org/pkg/container/list, but specially adapted // for use with kv.Pair to avoid the type assertion CPU expense of using Value with // an interface, per https://github.com/cosmos/cosmos-sdk/issues/8810 @@ -40,6 +44,28 @@ func (e *Element) Prev() *Element { return nil } +// seekPosition takes a nodes current position (with root.Next() being 0) +// and moves to the target position within the list. +func (e *Element) seekPosition(curPos int, targetPos int) *Element { + if e == nil { + return e + } + // fmt.Println(e.list.len, curPos, targetPos) + if targetPos >= curPos { + cur := e + for i := 0; i < (targetPos - curPos); i++ { + cur = cur.Next() + } + return cur + } else { + cur := e + for i := 0; i < (curPos - targetPos); i++ { + cur = cur.Prev() + } + return cur + } +} + // List represents a doubly linked list. // The zero value for List is an empty list ready to use. type List struct { @@ -234,3 +260,49 @@ func (l *List) PushFrontList(other *List) { l.insertValue(e.Value, &l.root) } } + +// SortedSearch searches for the first element with a key >= the argument. +// It assumes the list is sorted. +// This mimics a binary search in how it chooses what nodes to compare on, +// but traverses the list to get to the next node. +func (l *List) SortedSearch(start []byte) *Element { + // We copy the golang search logic here + // func Search(n int, f func(int) bool) int { + // // Define f(-1) == false and f(n) == true. + // // Invariant: f(i-1) == false, f(j) == true. + // i, j := 0, n + // for i < j { + // h := int(uint(i+j) >> 1) // avoid overflow when computing h + // // i ≤ h < j + // if !f(h) { + // i = h + 1 // preserves f(i-1) == false + // } else { + // j = h // preserves f(j) == true + // } + // } + // // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i. + // return i + // } + i_index, i_elem, j := 0, l.Front(), l.len + // quick check for if its the first element + // if bytes.Compare(i_elem.Value.Key, start) >= 0 { + // return i_elem + // } + + for i_index < j { + h := int(uint(i_index+j) >> 1) // avoid overflow when computing h + h_elem := i_elem.seekPosition(i_index, h) + // fmt.Println("h key", h_elem.Value.Key) + // i ≤ h < j + // f(index) in our case is bytes.Compare(l[index], start) >= 0 + // !f(index) is bytes.Compare(l[index], start) < 0 + if bytes.Compare(h_elem.Value.Key, start) < 0 { + i_index = h + 1 // preserves f(i-1) == false + i_elem = h_elem.Next() + } else { + j = h // preserves f(j) == true + } + } + // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i. + return i_elem +} From 8c99dda97d6cd9edf3b2efb9f5f56a30be711f20 Mon Sep 17 00:00:00 2001 From: ValarDragon Date: Sat, 24 Jul 2021 10:55:02 -0500 Subject: [PATCH 3/4] Dump cache to disk when its too big --- store/cachekv/store.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/store/cachekv/store.go b/store/cachekv/store.go index cd2f52aa1ffe..789af49e3412 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -80,6 +80,10 @@ func (store *Store) Set(key []byte, value []byte) { types.AssertValidValue(value) store.setCacheValue(key, value, false, true) + + if len(store.cache) >= 5012 { + store.Write() + } } // Has implements types.KVStore. @@ -235,6 +239,7 @@ func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) { } } + // push remaining unsorted items to end of sortedCache for _, kvp := range unsorted { store.sortedCache.PushBack(kvp) } From 7d5063b743058c8e2a5ac74e431a2b3d151c59ae Mon Sep 17 00:00:00 2001 From: ValarDragon Date: Sat, 24 Jul 2021 11:59:34 -0500 Subject: [PATCH 4/4] Revert Write() change --- store/cachekv/store.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/store/cachekv/store.go b/store/cachekv/store.go index 789af49e3412..7e5d9f348f80 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -80,10 +80,6 @@ func (store *Store) Set(key []byte, value []byte) { types.AssertValidValue(value) store.setCacheValue(key, value, false, true) - - if len(store.cache) >= 5012 { - store.Write() - } } // Has implements types.KVStore.