From 314e1d52c248e61847e0d78be165fc9d843ef812 Mon Sep 17 00:00:00 2001
From: Dev Ojha <ValarDragon@users.noreply.github.com>
Date: Sun, 12 Dec 2021 17:12:37 -0600
Subject: [PATCH] perf: Speedup cachekv iterator on large deletions & IBC v2
 upgrade logic (#10741)

---
 CHANGELOG.md                      |  1 +
 store/cachekv/memiterator.go      | 20 +++++++-----
 store/cachekv/store_bench_test.go | 51 +++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9bf2e26dac68..fd7058d72e04 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -149,6 +149,7 @@ Ref: https://keepachangelog.com/en/1.0.0/
 * [\#10468](https://github.com/cosmos/cosmos-sdk/pull/10468) Allow futureOps to queue additional operations in simulations
 * [\#10625](https://github.com/cosmos/cosmos-sdk/pull/10625) Add `--fee-payer` CLI flag
 * (cli) [\#10683](https://github.com/cosmos/cosmos-sdk/pull/10683) In CLI, allow 1 SIGN_MODE_DIRECT signer in transactions with multiple signers.
+* (store) [\#10741](https://github.com/cosmos/cosmos-sdk/pull/10741) Significantly speedup iterator creation after delete heavy workloads. Significantly improves IBC migration times.
 
 ### Bug Fixes
 
diff --git a/store/cachekv/memiterator.go b/store/cachekv/memiterator.go
index 0a4bc57a6406..04df40ff56aa 100644
--- a/store/cachekv/memiterator.go
+++ b/store/cachekv/memiterator.go
@@ -1,6 +1,8 @@
 package cachekv
 
 import (
+	"bytes"
+
 	dbm "github.com/tendermint/tm-db"
 
 	"github.com/cosmos/cosmos-sdk/store/types"
@@ -12,6 +14,7 @@ import (
 type memIterator struct {
 	types.Iterator
 
+	lastKey []byte
 	deleted map[string]struct{}
 }
 
@@ -29,22 +32,25 @@ func newMemIterator(start, end []byte, items *dbm.MemDB, deleted map[string]stru
 		panic(err)
 	}
 
-	newDeleted := make(map[string]struct{})
-	for k, v := range deleted {
-		newDeleted[k] = v
-	}
-
 	return &memIterator{
 		Iterator: iter,
 
-		deleted: newDeleted,
+		lastKey: nil,
+		deleted: deleted,
 	}
 }
 
 func (mi *memIterator) Value() []byte {
 	key := mi.Iterator.Key()
-	if _, ok := mi.deleted[string(key)]; ok {
+	// We need to handle the case where deleted is modified and includes our current key
+	// We handle this by maintaining a lastKey object in the iterator.
+	// If the current key is the same as the last key (and last key is not nil / the start)
+	// then we are calling value on the same thing as last time.
+	// Therefore we don't check the mi.deleted to see if this key is included in there.
+	reCallingOnOldLastKey := (mi.lastKey != nil) && bytes.Equal(key, mi.lastKey)
+	if _, ok := mi.deleted[string(key)]; ok && !reCallingOnOldLastKey {
 		return nil
 	}
+	mi.lastKey = key
 	return mi.Iterator.Value()
 }
diff --git a/store/cachekv/store_bench_test.go b/store/cachekv/store_bench_test.go
index 040c4a77c97c..88c86eff564a 100644
--- a/store/cachekv/store_bench_test.go
+++ b/store/cachekv/store_bench_test.go
@@ -78,6 +78,53 @@ func benchmarkRandomSet(b *testing.B, keysize int) {
 	for _, k := range keys {
 		kvstore.Set(k, value)
 	}
+
+	iter := kvstore.Iterator(keys[0], keys[b.N])
+	defer iter.Close()
+
+	for _ = iter.Key(); iter.Valid(); iter.Next() {
+		// deadcode elimination stub
+		sink = iter
+	}
+}
+
+// Benchmark creating an iterator on a parent with D entries,
+// that are all deleted in the cacheKV store.
+// We essentially are benchmarking the cacheKV iterator creation & iteration times
+// with the number of entries deleted in the parent.
+func benchmarkIteratorOnParentWithManyDeletes(b *testing.B, numDeletes int) {
+	mem := dbadapter.Store{DB: dbm.NewMemDB()}
+
+	// Use a singleton for value, to not waste time computing it
+	value := randSlice(32)
+	// Use simple values for keys, pick a random start,
+	// and take next D keys sequentially after.
+	startKey := randSlice(32)
+	keys := generateSequentialKeys(startKey, numDeletes)
+	// setup parent db with D keys.
+	for _, k := range keys {
+		mem.Set(k, value)
+	}
+	kvstore := cachekv.NewStore(mem)
+	// Delete all keys from the cache KV store.
+	// The keys[1:] is to keep at least one entry in parent, due to a bug in the SDK iterator design.
+	// Essentially the iterator will never be valid, in that it should never run.
+	// However, this is incompatible with the for loop structure the SDK uses, hence
+	// causes a panic. Thus we do keys[1:].
+	for _, k := range keys[1:] {
+		kvstore.Delete(k)
+	}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+
+	iter := kvstore.Iterator(keys[0], keys[b.N])
+	defer iter.Close()
+
+	for _ = iter.Key(); iter.Valid(); iter.Next() {
+		// deadcode elimination stub
+		sink = iter
+	}
 }
 
 func BenchmarkBlankParentIteratorNextKeySize32(b *testing.B) {
@@ -91,3 +138,7 @@ func BenchmarkBlankParentAppendKeySize32(b *testing.B) {
 func BenchmarkSetKeySize32(b *testing.B) {
 	benchmarkRandomSet(b, 32)
 }
+
+func BenchmarkIteratorOnParentWith1MDeletes(b *testing.B) {
+	benchmarkIteratorOnParentWithManyDeletes(b, 1_000_000)
+}