Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix cache_kv_store n^2 problem #17

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 29 additions & 12 deletions store/cachekv/memiterator.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cachekv

import (
"bytes"
"errors"

dbm "github.com/tendermint/tm-db"
Expand All @@ -17,23 +18,39 @@ type memIterator struct {
ascending bool
}

func newMemIterator(start, end []byte, items *kv.List, ascending bool) *memIterator {
itemsInDomain := make([]*kv.Pair, 0, items.Len())

var entered bool
func newMemIterator(start, end []byte, sortedItems *kv.List, ascending bool) *memIterator {
itemsInDomain := make([]*kv.Pair, 0, sortedItems.Len())

// Old code, that can generously be called optimized for small sorted cache sizes.
// (Notable perf issue exists for dbm.IsKeyInDomain repeating the start compare)
// This has been checked to yield the same result as the other case for larger inputs
if sortedItems.Len() <= 64 {
var entered bool
for e := sortedItems.Front(); e != nil; e = e.Next() {
item := e.Value
if !dbm.IsKeyInDomain(item.Key, start, end) {
if entered {
break
}

continue
}

for e := items.Front(); e != nil; e = e.Next() {
item := e.Value
if !dbm.IsKeyInDomain(item.Key, start, end) {
if entered {
itemsInDomain = append(itemsInDomain, item)
entered = true
}
} else {
// Code that handles large cache sizes.
// Find start point of range.
startElem := sortedItems.SortedSearch(start)
for e := startElem; e != nil; e = e.Next() {
item := e.Value
if end != nil && bytes.Compare(end, item.Key) <= 0 {
break
}

continue
itemsInDomain = append(itemsInDomain, item)
}

itemsInDomain = append(itemsInDomain, item)
entered = true
}

return &memIterator{
Expand Down
40 changes: 29 additions & 11 deletions store/cachekv/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,18 +200,8 @@ func byteSliceToStr(b []byte) string {
return *(*string)(unsafe.Pointer(hdr))
}

// Constructs a slice of dirty items, to use w/ memIterator.
func (store *Store) dirtyItems(start, end []byte) {
unsorted := make([]*kv.Pair, 0)

func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) {
n := len(store.unsortedCache)
for key := range store.unsortedCache {
if dbm.IsKeyInDomain(strToByte(key), start, end) {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
}

if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
for key := range store.unsortedCache {
delete(store.unsortedCache, key)
Expand Down Expand Up @@ -245,11 +235,39 @@ func (store *Store) dirtyItems(start, end []byte) {
}
}

// push remaining unsorted items to end of sortedCache
for _, kvp := range unsorted {
store.sortedCache.PushBack(kvp)
}
}

// Constructs a slice of dirty items, to use w/ memIterator.
func (store *Store) dirtyItems(start, end []byte) {
n := len(store.unsortedCache)
unsorted := make([]*kv.Pair, 0)
// If the unsortedCache is too big, its costs too much to determine
// whats in the subset we are concerned about.
// If you are interleaving iterator calls with writes, this can easily become an
// O(N^2) overhead.
// Even without that, too many range checks eventually becomes more expensive
// than just not having the cache.
if n >= 1024 {
for key := range store.unsortedCache {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
} else {
// else do a linear scan to determine if the unsorted pairs are in the pool.
for key := range store.unsortedCache {
if dbm.IsKeyInDomain(strToByte(key), start, end) {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
}
}
store.clearUnsortedCacheSubset(unsorted)
}

//----------------------------------------
// etc

Expand Down
5 changes: 5 additions & 0 deletions store/cachekv/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,23 +354,28 @@ func doRandomOp(t *testing.T, st types.CacheKVStore, truth dbm.DB, maxKey int) {
switch r {
case opSet:
k := randInt(maxKey)
fmt.Println("opSet", k)
st.Set(keyFmt(k), valFmt(k))
err := truth.Set(keyFmt(k), valFmt(k))
require.NoError(t, err)
case opSetRange:
start := randInt(maxKey - 2)
end := randInt(maxKey-start) + start
fmt.Println("opSetRange", start, end)
setRange(t, st, truth, start, end)
case opDel:
k := randInt(maxKey)
fmt.Println("opDel", k)
st.Delete(keyFmt(k))
err := truth.Delete(keyFmt(k))
require.NoError(t, err)
case opDelRange:
start := randInt(maxKey - 2)
end := randInt(maxKey-start) + start
fmt.Println("opDelRange", start, end)
deleteRange(t, st, truth, start, end)
case opWrite:
fmt.Println("opWrite")
st.Write()
}
}
Expand Down
72 changes: 72 additions & 0 deletions types/kv/list.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package kv

import (
"bytes"
)

// This code was copied from golang.org/pkg/container/list, but specially adapted
// for use with kv.Pair to avoid the type assertion CPU expense of using Value with
// an interface, per https://github.com/cosmos/cosmos-sdk/issues/8810
Expand Down Expand Up @@ -40,6 +44,28 @@ func (e *Element) Prev() *Element {
return nil
}

// seekPosition takes a nodes current position (with root.Next() being 0)
// and moves to the target position within the list.
func (e *Element) seekPosition(curPos int, targetPos int) *Element {
if e == nil {
return e
}
// fmt.Println(e.list.len, curPos, targetPos)
if targetPos >= curPos {
cur := e
for i := 0; i < (targetPos - curPos); i++ {
cur = cur.Next()
}
return cur
} else {
cur := e
for i := 0; i < (curPos - targetPos); i++ {
cur = cur.Prev()
}
return cur
}
}

// List represents a doubly linked list.
// The zero value for List is an empty list ready to use.
type List struct {
Expand Down Expand Up @@ -234,3 +260,49 @@ func (l *List) PushFrontList(other *List) {
l.insertValue(e.Value, &l.root)
}
}

// SortedSearch searches for the first element with a key >= the argument.
// It assumes the list is sorted.
// This mimics a binary search in how it chooses what nodes to compare on,
// but traverses the list to get to the next node.
func (l *List) SortedSearch(start []byte) *Element {
// We copy the golang search logic here
// func Search(n int, f func(int) bool) int {
// // Define f(-1) == false and f(n) == true.
// // Invariant: f(i-1) == false, f(j) == true.
// i, j := 0, n
// for i < j {
// h := int(uint(i+j) >> 1) // avoid overflow when computing h
// // i ≤ h < j
// if !f(h) {
// i = h + 1 // preserves f(i-1) == false
// } else {
// j = h // preserves f(j) == true
// }
// }
// // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
// return i
// }
i_index, i_elem, j := 0, l.Front(), l.len
// quick check for if its the first element
// if bytes.Compare(i_elem.Value.Key, start) >= 0 {
// return i_elem
// }

for i_index < j {
h := int(uint(i_index+j) >> 1) // avoid overflow when computing h
h_elem := i_elem.seekPosition(i_index, h)
// fmt.Println("h key", h_elem.Value.Key)
// i ≤ h < j
// f(index) in our case is bytes.Compare(l[index], start) >= 0
// !f(index) is bytes.Compare(l[index], start) < 0
if bytes.Compare(h_elem.Value.Key, start) < 0 {
i_index = h + 1 // preserves f(i-1) == false
i_elem = h_elem.Next()
} else {
j = h // preserves f(j) == true
}
}
// i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
return i_elem
}