osmosis-labs · ValarDragon · Jul 24, 2021 · Jul 24, 2021 · Jul 24, 2021 · Jul 24, 2021
@@ -1,6 +1,7 @@
 package cachekv
 
 import (
+	"bytes"
 	"errors"
 
 	dbm "github.com/tendermint/tm-db"
@@ -17,23 +18,39 @@ type memIterator struct {
 	ascending  bool
 }
 
-func newMemIterator(start, end []byte, items *kv.List, ascending bool) *memIterator {
-	itemsInDomain := make([]*kv.Pair, 0, items.Len())
-
-	var entered bool
+func newMemIterator(start, end []byte, sortedItems *kv.List, ascending bool) *memIterator {
+	itemsInDomain := make([]*kv.Pair, 0, sortedItems.Len())
+
+	// Old code, that can generously be called optimized for small sorted cache sizes.
+	// (Notable perf issue exists for dbm.IsKeyInDomain repeating the start compare)
+	// This has been checked to yield the same result as the other case for larger inputs
+	if sortedItems.Len() <= 64 {
+		var entered bool
+		for e := sortedItems.Front(); e != nil; e = e.Next() {
+			item := e.Value
+			if !dbm.IsKeyInDomain(item.Key, start, end) {
+				if entered {
+					break
+				}
+
+				continue
+			}
 
-	for e := items.Front(); e != nil; e = e.Next() {
-		item := e.Value
-		if !dbm.IsKeyInDomain(item.Key, start, end) {
-			if entered {
+			itemsInDomain = append(itemsInDomain, item)
+			entered = true
+		}
+	} else {
+		// Code that handles large cache sizes.
+		// Find start point of range.
+		startElem := sortedItems.SortedSearch(start)
+		for e := startElem; e != nil; e = e.Next() {
+			item := e.Value
+			if end != nil && bytes.Compare(end, item.Key) <= 0 {
 				break
 			}
 
-			continue
+			itemsInDomain = append(itemsInDomain, item)
 		}
-
-		itemsInDomain = append(itemsInDomain, item)
-		entered = true
 	}
 
 	return &memIterator{

@@ -200,18 +200,8 @@ func byteSliceToStr(b []byte) string {
 	return *(*string)(unsafe.Pointer(hdr))
 }
 
-// Constructs a slice of dirty items, to use w/ memIterator.
-func (store *Store) dirtyItems(start, end []byte) {
-	unsorted := make([]*kv.Pair, 0)
-
+func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) {
 	n := len(store.unsortedCache)
-	for key := range store.unsortedCache {
-		if dbm.IsKeyInDomain(strToByte(key), start, end) {
-			cacheValue := store.cache[key]
-			unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
-		}
-	}
-
 	if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
 		for key := range store.unsortedCache {
 			delete(store.unsortedCache, key)
@@ -245,11 +235,39 @@ func (store *Store) dirtyItems(start, end []byte) {
 		}
 	}
 
+	// push remaining unsorted items to end of sortedCache
 	for _, kvp := range unsorted {
 		store.sortedCache.PushBack(kvp)
 	}
 }
 
+// Constructs a slice of dirty items, to use w/ memIterator.
+func (store *Store) dirtyItems(start, end []byte) {
+	n := len(store.unsortedCache)
+	unsorted := make([]*kv.Pair, 0)
+	// If the unsortedCache is too big, its costs too much to determine
+	// whats in the subset we are concerned about.
+	// If you are interleaving iterator calls with writes, this can easily become an
+	// O(N^2) overhead.
+	// Even without that, too many range checks eventually becomes more expensive
+	// than just not having the cache.
+	if n >= 1024 {
+		for key := range store.unsortedCache {
+			cacheValue := store.cache[key]
+			unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
+		}
+	} else {
+		// else do a linear scan to determine if the unsorted pairs are in the pool.
+		for key := range store.unsortedCache {
+			if dbm.IsKeyInDomain(strToByte(key), start, end) {
+				cacheValue := store.cache[key]
+				unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
+			}
+		}
+	}
+	store.clearUnsortedCacheSubset(unsorted)
+}
+
 //----------------------------------------
 // etc
 

@@ -354,23 +354,28 @@ func doRandomOp(t *testing.T, st types.CacheKVStore, truth dbm.DB, maxKey int) {
 	switch r {
 	case opSet:
 		k := randInt(maxKey)
+		fmt.Println("opSet", k)
 		st.Set(keyFmt(k), valFmt(k))
 		err := truth.Set(keyFmt(k), valFmt(k))
 		require.NoError(t, err)
 	case opSetRange:
 		start := randInt(maxKey - 2)
 		end := randInt(maxKey-start) + start
+		fmt.Println("opSetRange", start, end)
 		setRange(t, st, truth, start, end)
 	case opDel:
 		k := randInt(maxKey)
+		fmt.Println("opDel", k)
 		st.Delete(keyFmt(k))
 		err := truth.Delete(keyFmt(k))
 		require.NoError(t, err)
 	case opDelRange:
 		start := randInt(maxKey - 2)
 		end := randInt(maxKey-start) + start
+		fmt.Println("opDelRange", start, end)
 		deleteRange(t, st, truth, start, end)
 	case opWrite:
+		fmt.Println("opWrite")
 		st.Write()
 	}
 }

@@ -1,5 +1,9 @@
 package kv
 
+import (
+	"bytes"
+)
+
 // This code was copied from golang.org/pkg/container/list, but specially adapted
 // for use with kv.Pair to avoid the type assertion CPU expense of using Value with
 // an interface, per https://github.com/cosmos/cosmos-sdk/issues/8810
@@ -40,6 +44,28 @@ func (e *Element) Prev() *Element {
 	return nil
 }
 
+// seekPosition takes a nodes current position (with root.Next() being 0)
+// and moves to the target position within the list.
+func (e *Element) seekPosition(curPos int, targetPos int) *Element {
+	if e == nil {
+		return e
+	}
+	// fmt.Println(e.list.len, curPos, targetPos)
+	if targetPos >= curPos {
+		cur := e
+		for i := 0; i < (targetPos - curPos); i++ {
+			cur = cur.Next()
+		}
+		return cur
+	} else {
+		cur := e
+		for i := 0; i < (curPos - targetPos); i++ {
+			cur = cur.Prev()
+		}
+		return cur
+	}
+}
+
 // List represents a doubly linked list.
 // The zero value for List is an empty list ready to use.
 type List struct {
@@ -234,3 +260,49 @@ func (l *List) PushFrontList(other *List) {
 		l.insertValue(e.Value, &l.root)
 	}
 }
+
+// SortedSearch searches for the first element with a key >= the argument.
+// It assumes the list is sorted.
+// This mimics a binary search in how it chooses what nodes to compare on,
+// but traverses the list to get to the next node.
+func (l *List) SortedSearch(start []byte) *Element {
+	// We copy the golang search logic here
+	// func Search(n int, f func(int) bool) int {
+	// 	// Define f(-1) == false and f(n) == true.
+	// 	// Invariant: f(i-1) == false, f(j) == true.
+	// 	i, j := 0, n
+	// 	for i < j {
+	// 		h := int(uint(i+j) >> 1) // avoid overflow when computing h
+	// 		// i ≤ h < j
+	// 		if !f(h) {
+	// 			i = h + 1 // preserves f(i-1) == false
+	// 		} else {
+	// 			j = h // preserves f(j) == true
+	// 		}
+	// 	}
+	// 	// i == j, f(i-1) == false, and f(j) (= f(i)) == true  =>  answer is i.
+	// 	return i
+	// }
+	i_index, i_elem, j := 0, l.Front(), l.len
+	// quick check for if its the first element
+	// if bytes.Compare(i_elem.Value.Key, start) >= 0 {
+	// 	return i_elem
+	// }
+
+	for i_index < j {
+		h := int(uint(i_index+j) >> 1) // avoid overflow when computing h
+		h_elem := i_elem.seekPosition(i_index, h)
+		// fmt.Println("h key", h_elem.Value.Key)
+		// i ≤ h < j
+		// f(index) in our case is bytes.Compare(l[index], start) >= 0
+		// !f(index) is bytes.Compare(l[index], start) < 0
+		if bytes.Compare(h_elem.Value.Key, start) < 0 {
+			i_index = h + 1 // preserves f(i-1) == false
+			i_elem = h_elem.Next()
+		} else {
+			j = h // preserves f(j) == true
+		}
+	}
+	// i == j, f(i-1) == false, and f(j) (= f(i)) == true  =>  answer is i.
+	return i_elem
+}