storage/engine: optimize MVCCFindSplitKey

Use an explicit iterator instead of Reader.Iterate() in order to reduce allocations. Specifically, we can avoid 2 allocations per key/value. Do not create a snapshot when calling MVCCFindSplitKey as internally it uses a single iterator which has an implicit snapshot. Remove the unused rangeID parameter to MVCCFindSplitKey. This parameter was only being used to lookup the range stats to log them under V(2). name old time/op new time/op delta MVCCFindSplitKey_RocksDB/valueSize=32-8 341ms ±23% 269ms ± 8% -21.29% (p=0.000 n=10+10) name old speed new speed delta MVCCFindSplitKey_RocksDB/valueSize=32-8 198MB/s ±19% 250MB/s ± 8% +26.34% (p=0.000 n=10+10) name old alloc/op new alloc/op delta MVCCFindSplitKey_RocksDB/valueSize=32-8 36.6MB ± 0% 0.0MB ± 0% -100.00% (p=0.000 n=9+10) name old allocs/op new allocs/op delta MVCCFindSplitKey_RocksDB/valueSize=32-8 1.16M ± 0% 0.00M ± 0% -100.00% (p=0.000 n=9+10) See cockroachdb#18646
petermattis · Sep 20, 2017 · 4da09e7 · 4da09e7
1 parent cc2d642
commit 4da09e7
Show file tree

Hide file tree

Showing 5 changed files with 124 additions and 105 deletions.
diff --git a/pkg/storage/engine/bench_rocksdb_test.go b/pkg/storage/engine/bench_rocksdb_test.go
@@ -80,6 +80,14 @@ func BenchmarkMVCCComputeStats_RocksDB(b *testing.B) {
 	}
 }
 
+func BenchmarkMVCCFindSplitKey_RocksDB(b *testing.B) {
+	for _, valueSize := range []int{32} {
+		b.Run(fmt.Sprintf("valueSize=%d", valueSize), func(b *testing.B) {
+			runMVCCFindSplitKey(setupMVCCRocksDB, valueSize, b)
+		})
+	}
+}
+
 func BenchmarkIterOnBatch_RocksDB(b *testing.B) {
 	for _, writes := range []int{10, 100, 1000, 10000} {
 		b.Run(fmt.Sprintf("writes=%d", writes), func(b *testing.B) {

diff --git a/pkg/storage/engine/bench_test.go b/pkg/storage/engine/bench_test.go
@@ -590,6 +590,28 @@ func runMVCCComputeStats(emk engineMaker, valueBytes int, b *testing.B) {
 	log.Infof(context.Background(), "live_bytes: %d", stats.LiveBytes)
 }
 
+// runMVCCCFindSplitKey benchmarks MVCCFindSplitKey on a 64MB range of data.
+func runMVCCFindSplitKey(emk engineMaker, valueBytes int, b *testing.B) {
+	const rangeBytes = 64 * 1024 * 1024
+	numKeys := rangeBytes / (overhead + valueBytes)
+	eng, _ := setupMVCCData(emk, 1, numKeys, valueBytes, b)
+	defer eng.Close()
+
+	b.SetBytes(rangeBytes)
+	b.ResetTimer()
+
+	var err error
+	for i := 0; i < b.N; i++ {
+		_, err = MVCCFindSplitKey(
+			context.Background(), eng, roachpb.RKeyMin, roachpb.RKeyMax, rangeBytes/2)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+
+	b.StopTimer()
+}
+
 func runBatchApplyBatchRepr(
 	emk engineMaker, writeOnly bool, valueSize, batchSize int, b *testing.B,
 ) {

diff --git a/pkg/storage/engine/mvcc.go b/pkg/storage/engine/mvcc.go
@@ -23,7 +23,6 @@ import (
 
 	"golang.org/x/net/context"
 
-	"github.com/dustin/go-humanize"
 	"github.com/gogo/protobuf/proto"
 	"github.com/pkg/errors"
 
@@ -2294,7 +2293,6 @@ func IsValidSplitKey(key roachpb.Key) bool {
 func MVCCFindSplitKey(
 	ctx context.Context,
 	engine Reader,
-	rangeID roachpb.RangeID,
 	key,
 	endKey roachpb.RKey,
 	targetSize int64,
@@ -2306,33 +2304,29 @@ func MVCCFindSplitKey(
 	encStartKey := MakeMVCCMetadataKey(key.AsRawKey())
 	encEndKey := MakeMVCCMetadataKey(endKey.AsRawKey())
 
-	if log.V(2) {
-		log.Infof(ctx, "searching split key for %d [%s, %s)", rangeID, key, endKey)
-	}
-
-	// Get range size from stats.
-	ms, err := MVCCGetRangeStats(ctx, engine, rangeID)
-	if err != nil {
-		return nil, err
-	}
-
-	rangeSize := ms.KeyBytes + ms.ValBytes
-	if log.V(2) {
-		log.Infof(ctx, "range size: %s, targetSize %s",
-			humanize.IBytes(uint64(rangeSize)), humanize.IBytes(uint64(targetSize)))
-	}
-
 	sizeSoFar := int64(0)
 	bestSplitKey := encStartKey
 	bestSplitDiff := int64(math.MaxInt64)
 	var lastKey roachpb.Key
+	var lastKeyBuf []byte
+	var bestSplitKeyBuf []byte
 	var n int
 
-	if err := engine.Iterate(encStartKey, encEndKey, func(kv MVCCKeyValue) (bool, error) {
+	it := engine.NewIterator(false /* prefix */)
+	defer it.Close()
+
+	for it.Seek(encStartKey); ; it.Next() {
+		if ok, err := it.Valid(); err != nil {
+			return nil, err
+		} else if !ok || !it.Less(encEndKey) {
+			break
+		}
+		unsafeKey := it.UnsafeKey()
+
 		n++
 		// Is key within a legal key range? Note that we never choose the first key
 		// as the split key.
-		valid := n > 1 && IsValidSplitKey(kv.Key.Key)
+		valid := n > 1 && IsValidSplitKey(unsafeKey.Key)
 
 		// Determine if this key would make a better split than last "best" key.
 		diff := targetSize - sizeSoFar
@@ -2341,29 +2335,30 @@ func MVCCFindSplitKey(
 		}
 		if valid && diff < bestSplitDiff {
 			if log.V(2) {
-				log.Infof(ctx, "better split: diff %d at %s", diff, kv.Key)
+				log.Infof(ctx, "better split: diff %d at %s", diff, unsafeKey)
 			}
-			bestSplitKey = kv.Key
+			bestSplitKey = unsafeKey
+			bestSplitKey.Key = append(bestSplitKeyBuf, bestSplitKey.Key...)
+			bestSplitKeyBuf = bestSplitKey.Key[:0]
 			bestSplitDiff = diff
 		}
 
 		// Determine whether we've found best key and can exit iteration.
-		done := !bestSplitKey.Key.Equal(encStartKey.Key) && diff > bestSplitDiff
-		if done && log.V(2) {
-			log.Infof(ctx, "target size reached")
+		if !bestSplitKey.Key.Equal(encStartKey.Key) && diff > bestSplitDiff {
+			if log.V(2) {
+				log.Infof(ctx, "target size reached")
+			}
+			break
 		}
 
 		// Add this key/value to the size scanned so far.
-		if kv.Key.IsValue() && bytes.Equal(kv.Key.Key, lastKey) {
-			sizeSoFar += mvccVersionTimestampSize + int64(len(kv.Value))
+		if unsafeKey.IsValue() && bytes.Equal(unsafeKey.Key, lastKey) {
+			sizeSoFar += mvccVersionTimestampSize + int64(len(it.UnsafeValue()))
 		} else {
-			sizeSoFar += int64(kv.Key.EncodedSize() + len(kv.Value))
+			sizeSoFar += int64(unsafeKey.EncodedSize() + len(it.UnsafeValue()))
 		}
-		lastKey = kv.Key.Key
-
-		return done, nil
-	}); err != nil {
-		return nil, err
+		lastKey = append(lastKeyBuf, unsafeKey.Key...)
+		lastKeyBuf = lastKey[:0]
 	}
 
 	if bestSplitKey.Key.Equal(encStartKey.Key) {

diff --git a/pkg/storage/engine/mvcc_test.go b/pkg/storage/engine/mvcc_test.go
@@ -3076,8 +3076,6 @@ func TestFindSplitKey(t *testing.T) {
 	if err := MVCCSetRangeStats(context.Background(), engine, rangeID, ms); err != nil {
 		t.Fatal(err)
 	}
-	snap := engine.NewSnapshot()
-	defer snap.Close()
 
 	testData := []struct {
 		targetSize int64
@@ -3090,7 +3088,7 @@ func TestFindSplitKey(t *testing.T) {
 
 	for i, td := range testData {
 		humanSplitKey, err := MVCCFindSplitKey(
-			context.Background(), snap, rangeID, roachpb.RKeyMin, roachpb.RKeyMax, td.targetSize)
+			context.Background(), engine, roachpb.RKeyMin, roachpb.RKeyMax, td.targetSize)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -3198,48 +3196,47 @@ func TestFindValidSplitKeys(t *testing.T) {
 	}
 
 	for i, test := range testCases {
-		engine := createTestEngine()
-		defer engine.Close()
+		t.Run("", func(t *testing.T) {
+			engine := createTestEngine()
+			defer engine.Close()
 
-		ms := &enginepb.MVCCStats{}
-		val := roachpb.MakeValueFromString(strings.Repeat("X", 10))
-		for _, k := range test.keys {
-			if err := MVCCPut(context.Background(), engine, ms, []byte(k), hlc.Timestamp{Logical: 1}, val, nil); err != nil {
+			ms := &enginepb.MVCCStats{}
+			val := roachpb.MakeValueFromString(strings.Repeat("X", 10))
+			for _, k := range test.keys {
+				if err := MVCCPut(context.Background(), engine, ms, []byte(k), hlc.Timestamp{Logical: 1}, val, nil); err != nil {
+					t.Fatal(err)
+				}
+			}
+			// write stats
+			if err := MVCCSetRangeStats(context.Background(), engine, rangeID, ms); err != nil {
 				t.Fatal(err)
 			}
-		}
-		// write stats
-		if err := MVCCSetRangeStats(context.Background(), engine, rangeID, ms); err != nil {
-			t.Fatal(err)
-		}
-		snap := engine.NewSnapshot()
-		defer snap.Close()
-		rangeStart := test.keys[0]
-		rangeEnd := test.keys[len(test.keys)-1].Next()
-		rangeStartAddr, err := keys.Addr(rangeStart)
-		if err != nil {
-			t.Fatal(err)
-		}
-		rangeEndAddr, err := keys.Addr(rangeEnd)
-		if err != nil {
-			t.Fatal(err)
-		}
-		targetSize := (ms.KeyBytes + ms.ValBytes) / 2
-		splitKey, err := MVCCFindSplitKey(
-			context.Background(), snap, rangeID, rangeStartAddr, rangeEndAddr, targetSize)
-		if test.expError {
-			if !testutils.IsError(err, "has no valid splits") {
-				t.Errorf("%d: unexpected error: %v", i, err)
+			rangeStart := test.keys[0]
+			rangeEnd := test.keys[len(test.keys)-1].Next()
+			rangeStartAddr, err := keys.Addr(rangeStart)
+			if err != nil {
+				t.Fatal(err)
 			}
-			continue
-		}
-		if err != nil {
-			t.Errorf("%d; unexpected error: %s", i, err)
-			continue
-		}
-		if !splitKey.Equal(test.expSplit) {
-			t.Errorf("%d: expected split key %q; got %q", i, test.expSplit, splitKey)
-		}
+			rangeEndAddr, err := keys.Addr(rangeEnd)
+			if err != nil {
+				t.Fatal(err)
+			}
+			targetSize := (ms.KeyBytes + ms.ValBytes) / 2
+			splitKey, err := MVCCFindSplitKey(
+				context.Background(), engine, rangeStartAddr, rangeEndAddr, targetSize)
+			if test.expError {
+				if !testutils.IsError(err, "has no valid splits") {
+					t.Fatalf("%d: unexpected error: %v", i, err)
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("%d; unexpected error: %s", i, err)
+			}
+			if !splitKey.Equal(test.expSplit) {
+				t.Errorf("%d: expected split key %q; got %q", i, test.expSplit, splitKey)
+			}
+		})
 	}
 }
 
@@ -3292,37 +3289,36 @@ func TestFindBalancedSplitKeys(t *testing.T) {
 	}
 
 	for i, test := range testCases {
-		engine := createTestEngine()
-		defer engine.Close()
-
-		ms := &enginepb.MVCCStats{}
-		var expKey roachpb.Key
-		for j, keySize := range test.keySizes {
-			key := roachpb.Key(fmt.Sprintf("%d%s", j, strings.Repeat("X", keySize)))
-			if test.expSplit == j {
-				expKey = key
+		t.Run("", func(t *testing.T) {
+			engine := createTestEngine()
+			defer engine.Close()
+
+			ms := &enginepb.MVCCStats{}
+			var expKey roachpb.Key
+			for j, keySize := range test.keySizes {
+				key := roachpb.Key(fmt.Sprintf("%d%s", j, strings.Repeat("X", keySize)))
+				if test.expSplit == j {
+					expKey = key
+				}
+				val := roachpb.MakeValueFromString(strings.Repeat("X", test.valSizes[j]))
+				if err := MVCCPut(context.Background(), engine, ms, key, hlc.Timestamp{Logical: 1}, val, nil); err != nil {
+					t.Fatal(err)
+				}
 			}
-			val := roachpb.MakeValueFromString(strings.Repeat("X", test.valSizes[j]))
-			if err := MVCCPut(context.Background(), engine, ms, key, hlc.Timestamp{Logical: 1}, val, nil); err != nil {
+			// write stats
+			if err := MVCCSetRangeStats(context.Background(), engine, rangeID, ms); err != nil {
 				t.Fatal(err)
 			}
-		}
-		// write stats
-		if err := MVCCSetRangeStats(context.Background(), engine, rangeID, ms); err != nil {
-			t.Fatal(err)
-		}
-		snap := engine.NewSnapshot()
-		defer snap.Close()
-		targetSize := (ms.KeyBytes + ms.ValBytes) / 2
-		splitKey, err := MVCCFindSplitKey(
-			context.Background(), snap, rangeID, roachpb.RKey("\x02"), roachpb.RKeyMax, targetSize)
-		if err != nil {
-			t.Errorf("unexpected error: %s", err)
-			continue
-		}
-		if !splitKey.Equal(expKey) {
-			t.Errorf("%d: expected split key %q; got %q", i, expKey, splitKey)
-		}
+			targetSize := (ms.KeyBytes + ms.ValBytes) / 2
+			splitKey, err := MVCCFindSplitKey(
+				context.Background(), engine, roachpb.RKey("\x02"), roachpb.RKeyMax, targetSize)
+			if err != nil {
+				t.Fatalf("unexpected error: %s", err)
+			}
+			if !splitKey.Equal(expKey) {
+				t.Errorf("%d: expected split key %q; got %q", i, expKey, splitKey)
+			}
+		})
 	}
 }
 

diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go
@@ -2660,12 +2660,10 @@ func (r *Replica) adminSplitWithDescriptor(
 		var foundSplitKey roachpb.Key
 		if len(args.SplitKey) == 0 {
 			// Find a key to split by size.
-			snap := r.store.engine.NewSnapshot()
-			defer snap.Close()
 			var err error
 			targetSize := r.GetMaxBytes() / 2
 			foundSplitKey, err = engine.MVCCFindSplitKey(
-				ctx, snap, desc.RangeID, desc.StartKey, desc.EndKey, targetSize)
+				ctx, r.store.engine, desc.StartKey, desc.EndKey, targetSize)
 			if err != nil {
 				return reply, false, roachpb.NewErrorf("unable to determine split key: %s", err)
 			}