From 1dec2f8510a9e275647f0796ad231a88c850d854 Mon Sep 17 00:00:00 2001 From: wenyihu6 Date: Mon, 17 Jul 2023 22:59:01 -0400 Subject: [PATCH] asim: add randomized range generation --- pkg/kv/kvserver/asim/gen/generator.go | 27 ++++- pkg/kv/kvserver/asim/state/new_state.go | 101 ++++++++++++++++++ pkg/kv/kvserver/asim/tests/BUILD.bazel | 3 +- .../kvserver/asim/tests/default_settings.go | 21 ++++ pkg/kv/kvserver/asim/tests/rand_framework.go | 94 +++++++++++++++- pkg/kv/kvserver/asim/tests/rand_gen.go | 70 ++++++++++++ pkg/kv/kvserver/asim/tests/rand_test.go | 6 +- pkg/kv/kvserver/asim/workload/workload.go | 9 ++ 8 files changed, 318 insertions(+), 13 deletions(-) diff --git a/pkg/kv/kvserver/asim/gen/generator.go b/pkg/kv/kvserver/asim/gen/generator.go index d5fc7fdd851f..3672f57ed533 100644 --- a/pkg/kv/kvserver/asim/gen/generator.go +++ b/pkg/kv/kvserver/asim/gen/generator.go @@ -209,8 +209,16 @@ type PlacementType int const ( Uniform PlacementType = iota Skewed + Random + WeightedRandom ) +func GetAvailablePlacementTypes() []PlacementType { + // WeightedRandom is enabled if and only if default setting contains a + // non-zero array for weighted rand. + return []PlacementType{Uniform, Skewed, Random} +} + // BaseRanges provides basic ranges functionality and are embedded in // other specialized range structs. type BaseRanges struct { @@ -220,21 +228,27 @@ type BaseRanges struct { Bytes int64 } -// getRangesInfo generates RangesInfo, with its distribution defined by +// GetRangesInfo generates RangesInfo, with its distribution defined by // PlacementType and other configurations determined by BaseRanges fields. -func (b BaseRanges) getRangesInfo(pType PlacementType, numOfStores int) state.RangesInfo { +func (b BaseRanges) GetRangesInfo( + pType PlacementType, numOfStores int, randSource *rand.Rand, weightedRandom []float64, +) state.RangesInfo { switch pType { case Uniform: return state.RangesInfoEvenDistribution(numOfStores, b.Ranges, b.KeySpace, b.ReplicationFactor, b.Bytes) case Skewed: return state.RangesInfoSkewedDistribution(numOfStores, b.Ranges, b.KeySpace, b.ReplicationFactor, b.Bytes) + case WeightedRandom: + return state.RangesInfoWeightedRandDistribution(randSource, weightedRandom, b.Ranges, b.KeySpace, b.ReplicationFactor, b.Bytes) + case Random: + return state.RangesInfoRandDistribution(randSource, numOfStores, b.Ranges, b.KeySpace, b.ReplicationFactor, b.Bytes) default: panic(fmt.Sprintf("unexpected range placement type %v", pType)) } } // LoadRangeInfo loads the given state with the specified rangesInfo. -func (b BaseRanges) loadRangeInfo(s state.State, rangesInfo state.RangesInfo) { +func (b BaseRanges) LoadRangeInfo(s state.State, rangesInfo state.RangesInfo) { for _, rangeInfo := range rangesInfo { rangeInfo.Size = b.Bytes } @@ -250,6 +264,9 @@ type BasicRanges struct { func NewBasicRanges( ranges int, placementType PlacementType, keySpace int, replicationFactor int, bytes int64, ) BasicRanges { + if placementType == WeightedRandom || placementType == Random { + panic(fmt.Sprintf("basic ranges cannot use randomized type %v", placementType)) + } return BasicRanges{ BaseRanges: BaseRanges{ Ranges: ranges, @@ -266,8 +283,8 @@ func NewBasicRanges( func (br BasicRanges) Generate( seed int64, settings *config.SimulationSettings, s state.State, ) state.State { - rangesInfo := br.getRangesInfo(br.PlacementType, len(s.Stores())) - br.loadRangeInfo(s, rangesInfo) + rangesInfo := br.GetRangesInfo(br.PlacementType, len(s.Stores()), nil, []float64{}) + br.LoadRangeInfo(s, rangesInfo) return s } diff --git a/pkg/kv/kvserver/asim/state/new_state.go b/pkg/kv/kvserver/asim/state/new_state.go index cee7959759b1..3e14d6d555de 100644 --- a/pkg/kv/kvserver/asim/state/new_state.go +++ b/pkg/kv/kvserver/asim/state/new_state.go @@ -12,6 +12,7 @@ package state import ( "fmt" + "math/rand" "sort" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/config" @@ -62,6 +63,53 @@ func exactDistribution(counts []int) []float64 { return distribution } +type Weighted struct { + cumulativeWeights []float64 +} + +func NewWeighted(weightedStores []float64) Weighted { + cumulativeWeights := make([]float64, len(weightedStores)) + prefixSumWeight := float64(0) + for i, item := range weightedStores { + prefixSumWeight += item + cumulativeWeights[i] = prefixSumWeight + } + return Weighted{cumulativeWeights: cumulativeWeights} +} + +func (w Weighted) Rand(randSource *rand.Rand) int { + r := randSource.Float64() + index := sort.Search(len(w.cumulativeWeights), func(i int) bool { return w.cumulativeWeights[i] >= r }) + return index +} + +func weightedRandDistribution(randSource *rand.Rand, weightedStores []float64) []float64 { + w := NewWeighted(weightedStores) + // Vote for 10 times and give a distribution. + numSamples := 10 + votes := make([]int, len(weightedStores)) + for i := 0; i < numSamples; i++ { + index := w.Rand(randSource) + votes[index] += 1 + } + return exactDistribution(votes) +} + +func randDistribution(randSource *rand.Rand, n int) []float64 { + total := float64(0) + distribution := make([]float64, n) + for i := 0; i < n; i++ { + num := float64(randSource.Intn(10)) + distribution[i] = num + total += num + } + + for i := 0; i < n; i++ { + distribution[i] = distribution[i] / total + } + return distribution +} + // RangesInfoWithDistribution returns a RangesInfo, where the stores given are // initialized with the specified % of the replicas. This is done on a best // effort basis, given the replication factor. It may be impossible to satisfy @@ -247,6 +295,59 @@ func RangesInfoEvenDistribution( int64(MinKey), int64(keyspace), rangeSize) } +// Weighted distribution: vote for 10 times and see which bucket the number falls under +func RangesInfoWeightedRandDistribution( + randSource *rand.Rand, + weightedStores []float64, + ranges int, + keyspace int, + replicationFactor int, + rangeSize int64, +) RangesInfo { + if randSource == nil || len(weightedStores) == 0 { + panic("unexpected arguments for weighted random ranges info") + } + distribution := weightedRandDistribution(randSource, weightedStores) + storeList := makeStoreList(len(weightedStores)) + spanConfig := defaultSpanConfig + spanConfig.NumReplicas = int32(replicationFactor) + spanConfig.NumVoters = int32(replicationFactor) + return RangesInfoWithDistribution( + storeList, + distribution, + distribution, + ranges, + spanConfig, + int64(MinKey), + int64(keyspace), + rangeSize, /* rangeSize */ + ) +} + +func RangesInfoRandDistribution( + randSource *rand.Rand, + stores int, + ranges int, + keyspace int, + replicationFactor int, + rangeSize int64, +) RangesInfo { + if randSource == nil { + // BETTER NAME HERE + panic("unexpected arguments for weighted random ranges info") + } + distribution := randDistribution(randSource, stores) + storeList := makeStoreList(stores) + + spanConfig := defaultSpanConfig + spanConfig.NumReplicas = int32(replicationFactor) + spanConfig.NumVoters = int32(replicationFactor) + + return RangesInfoWithDistribution( + storeList, distribution, distribution, ranges, spanConfig, + int64(MinKey), int64(keyspace), rangeSize) +} + // NewStateWithDistribution returns a State where the stores given are // initialized with the specified % of the replicas. This is done on a best // effort basis, given the replication factor. It may be impossible to satisfy diff --git a/pkg/kv/kvserver/asim/tests/BUILD.bazel b/pkg/kv/kvserver/asim/tests/BUILD.bazel index 2b0faf6098dd..a5eb23610f47 100644 --- a/pkg/kv/kvserver/asim/tests/BUILD.bazel +++ b/pkg/kv/kvserver/asim/tests/BUILD.bazel @@ -33,9 +33,9 @@ go_library( name = "tests", srcs = [ "assert.go", - "default_settings.go", "rand_framework.go", "rand_gen.go", + "settings.go", ], importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/tests", visibility = ["//visibility:public"], @@ -46,6 +46,7 @@ go_library( "//pkg/kv/kvserver/asim/gen", "//pkg/kv/kvserver/asim/metrics", "//pkg/kv/kvserver/asim/state", + "//pkg/kv/kvserver/asim/workload", "//pkg/roachpb", "//pkg/spanconfig/spanconfigtestutils", "//pkg/util/log", diff --git a/pkg/kv/kvserver/asim/tests/default_settings.go b/pkg/kv/kvserver/asim/tests/default_settings.go index 0a1ee01aac9e..c62a05e4ebe4 100644 --- a/pkg/kv/kvserver/asim/tests/default_settings.go +++ b/pkg/kv/kvserver/asim/tests/default_settings.go @@ -100,3 +100,24 @@ func defaultPlotSettings() plotSettings { width: defaultWidth, } } + +type rangeGenSettings struct { + rangeKeyGenType generatorType + keySpaceGenType generatorType + weightedRand []float64 +} + +const ( + defaultRangeKeyGenType = uniformGenerator + defaultKeySpaceGenType = uniformGenerator +) + +var defaultWeightedRand []float64 + +func defaultRangeGenSettings() rangeGenSettings { + return rangeGenSettings{ + rangeKeyGenType: defaultRangeKeyGenType, + keySpaceGenType: defaultKeySpaceGenType, + weightedRand: defaultWeightedRand, + } +} diff --git a/pkg/kv/kvserver/asim/tests/rand_framework.go b/pkg/kv/kvserver/asim/tests/rand_framework.go index d5e803c4aa28..56cc806e74b6 100644 --- a/pkg/kv/kvserver/asim/tests/rand_framework.go +++ b/pkg/kv/kvserver/asim/tests/rand_framework.go @@ -13,6 +13,7 @@ package tests import ( "context" "fmt" + "math" "math/rand" "strings" "testing" @@ -21,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/gen" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/metrics" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/workload" "github.com/guptarohit/asciigraph" ) @@ -30,10 +32,16 @@ type testSettings struct { verbose bool randSource *rand.Rand randOptions map[string]bool + rangeGen rangeGenSettings } func newTestSettings( - numIterations int, duration time.Duration, verbose bool, seed int64, randOptions map[string]bool, + numIterations int, + duration time.Duration, + verbose bool, + seed int64, + randOptions map[string]bool, + rangeGenSettings rangeGenSettings, ) testSettings { return testSettings{ numIterations: numIterations, @@ -41,16 +49,23 @@ func newTestSettings( verbose: verbose, randSource: rand.New(rand.NewSource(seed)), randOptions: randOptions, + rangeGen: rangeGenSettings, } } type randTestingFramework struct { - s testSettings + s testSettings + rangeGenerator generator + keySpaceGenerator generator } func newRandTestingFramework(settings testSettings) randTestingFramework { + rangeGenerator := newRandomizedGenerator(settings.randSource, defaultMinRange, defaultMaxRange, settings.rangeGen.rangeKeyGenType) + keySpaceGenerator := newRandomizedGenerator(settings.randSource, defaultMinKeySpace, defaultMaxKeySpace, settings.rangeGen.keySpaceGenType) return randTestingFramework{ - s: settings, + s: settings, + rangeGenerator: rangeGenerator, + keySpaceGenerator: keySpaceGenerator, } } @@ -65,7 +80,7 @@ func (f randTestingFramework) getRanges() gen.RangeGen { if !f.s.randOptions["ranges"] { return defaultBasicRangesGen() } - return gen.BasicRanges{} + return f.randomBasicRangesGen() } func (f randTestingFramework) getLoad() gen.LoadGen { @@ -183,3 +198,74 @@ func checkAssertions( } return false, "" } + +type generator interface { + Num() int64 +} + +type generatorType int + +const ( + uniformGenerator generatorType = iota + zipfGenerator +) + +func newGenerator(randSource *rand.Rand, iMin int64, iMax int64, gType generatorType) generator { + switch gType { + case uniformGenerator: + return workload.NewUniformKeyGen(iMin, iMax, randSource) + case zipfGenerator: + return workload.NewZipfianKeyGen(iMin, iMax, 1.1, 1, randSource) + default: + panic(fmt.Sprintf("unexpected generator type %v", gType)) + } +} + +func newRandomizedGenerator( + randSource *rand.Rand, iMin int64, iMax int64, gType generatorType, +) generator { + return newGenerator(randSource, iMin, iMax, gType) +} + +const ( + defaultMinRange = 1 + defaultMaxRange = 1000 + defaultMinKeySpace = 1000 + defaultMaxKeySpace = 200000 +) + +func convertInt64ToInt(num int64) int { + // Should be impossible since we have set imax, imin to something smaller to imax32 + if num < math.MinInt32 { + return math.MinInt32 + } + if num > math.MaxUint32 { + return math.MaxUint32 + } + return int(num) +} + +func (f randTestingFramework) randomBasicRangesGen() gen.RangeGen { + options := gen.GetAvailablePlacementTypes() + randIndex := f.s.randSource.Intn(len(options)) + chosenType := options[randIndex] + if len(f.s.rangeGen.weightedRand) == 0 { + return NewRandomizedBasicRanges( + f.s.randSource, + convertInt64ToInt(f.rangeGenerator.Num()), + convertInt64ToInt(f.keySpaceGenerator.Num()), + chosenType, + defaultReplicationFactor, + defaultBytes, + ) + } else { + return NewWeightedRandomizedBasicRanges( + f.s.randSource, + f.s.rangeGen.weightedRand, + convertInt64ToInt(f.rangeGenerator.Num()), + convertInt64ToInt(f.keySpaceGenerator.Num()), + defaultReplicationFactor, + defaultBytes, + ) + } +} diff --git a/pkg/kv/kvserver/asim/tests/rand_gen.go b/pkg/kv/kvserver/asim/tests/rand_gen.go index e69be4167bd2..9166c9bf4491 100644 --- a/pkg/kv/kvserver/asim/tests/rand_gen.go +++ b/pkg/kv/kvserver/asim/tests/rand_gen.go @@ -13,6 +13,7 @@ package tests import ( "math/rand" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/config" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/gen" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/state" ) @@ -23,3 +24,72 @@ func (f randTestingFramework) randomClusterInfoGen(randSource *rand.Rand) gen.Lo chosenType := state.ClusterOptions[chosenIndex] return loadClusterInfo(chosenType) } + +type RandomizedBasicRanges struct { + gen.BaseRanges + placementType gen.PlacementType + randSource *rand.Rand +} + +func NewRandomizedBasicRanges( + randSource *rand.Rand, + ranges int, + keySpace int, + placementType gen.PlacementType, + replicationFactor int, + bytes int64, +) RandomizedBasicRanges { + if placementType == gen.WeightedRandom { + // BETTER WARNING + panic("cannot use randomized basic ranges") + } + return RandomizedBasicRanges{ + BaseRanges: gen.BaseRanges{ + Ranges: ranges, + KeySpace: keySpace, + ReplicationFactor: replicationFactor, + Bytes: bytes, + }, + placementType: placementType, + randSource: randSource, + } +} + +var _ gen.RangeGen = &RandomizedBasicRanges{} + +func (r RandomizedBasicRanges) Generate( + seed int64, settings *config.SimulationSettings, s state.State, +) state.State { + rangesInfo := r.GetRangesInfo(r.placementType, len(s.Stores()), r.randSource, []float64{}) + r.LoadRangeInfo(s, rangesInfo) + return s +} + +type WeightedRandomizedBasicRanges struct { + RandomizedBasicRanges + weightedRand []float64 +} + +var _ gen.RangeGen = &WeightedRandomizedBasicRanges{} + +func NewWeightedRandomizedBasicRanges( + randSource *rand.Rand, + weightedRand []float64, + ranges int, + keySpace int, + replicationFactor int, + bytes int64, +) WeightedRandomizedBasicRanges { + return WeightedRandomizedBasicRanges{ + RandomizedBasicRanges: NewRandomizedBasicRanges(randSource, ranges, keySpace, gen.WeightedRandom, replicationFactor, bytes), + weightedRand: weightedRand, + } +} + +func (wr WeightedRandomizedBasicRanges) Generate( + seed int64, settings *config.SimulationSettings, s state.State, +) state.State { + rangesInfo := wr.GetRangesInfo(wr.placementType, len(s.Stores()), wr.randSource, wr.weightedRand) + wr.LoadRangeInfo(s, rangesInfo) + return s +} diff --git a/pkg/kv/kvserver/asim/tests/rand_test.go b/pkg/kv/kvserver/asim/tests/rand_test.go index 02b755593342..b7fefd15cac8 100644 --- a/pkg/kv/kvserver/asim/tests/rand_test.go +++ b/pkg/kv/kvserver/asim/tests/rand_test.go @@ -16,14 +16,14 @@ import ( ) const ( - defaultNumIterations = 5 + defaultNumIterations = 1 defaultSeed = 42 defaultDuration = 30 * time.Minute defaultVerbosity = false ) func defaultSettings(randOptions map[string]bool) testSettings { - return newTestSettings(defaultNumIterations, defaultDuration, defaultVerbosity, defaultSeed, randOptions) + return newTestSettings(defaultNumIterations, defaultDuration, defaultVerbosity, defaultSeed, randOptions, defaultRangeGenSettings()) } // TestRandomized is a randomized testing framework which validates an allocator @@ -43,7 +43,7 @@ func defaultSettings(randOptions map[string]bool) testSettings { func TestRandomized(t *testing.T) { randOptions := map[string]bool{ "cluster": true, - "ranges": false, + "ranges": true, "load": false, "static_settings": false, "static_events": false, diff --git a/pkg/kv/kvserver/asim/workload/workload.go b/pkg/kv/kvserver/asim/workload/workload.go index 831878ef83ec..c731d53cf99d 100644 --- a/pkg/kv/kvserver/asim/workload/workload.go +++ b/pkg/kv/kvserver/asim/workload/workload.go @@ -164,6 +164,7 @@ func (rwg *RandomGenerator) Tick(maxTime time.Time) LoadBatch { // KeyGenerator generates read and write keys. type KeyGenerator interface { + Num() int64 writeKey() int64 readKey() int64 rand() *rand.Rand @@ -190,6 +191,10 @@ func NewUniformKeyGen(min, max int64, rand *rand.Rand) KeyGenerator { } } +func (g *uniformGenerator) Num() int64 { + return g.random.Int63n(g.max-g.min) + g.min +} + func (g *uniformGenerator) writeKey() int64 { return g.random.Int63n(g.max-g.min) + g.min } @@ -229,6 +234,10 @@ func NewZipfianKeyGen(min, max int64, s float64, v float64, random *rand.Rand) K } } +func (g *zipfianGenerator) Num() int64 { + return int64(g.zipf.Uint64()) + g.min +} + func (g *zipfianGenerator) writeKey() int64 { return int64(g.zipf.Uint64()) + g.min }