From 28a99fcc248172032fb14fb592aceef9eb8e5ff1 Mon Sep 17 00:00:00 2001 From: wenyihu6 Date: Thu, 31 Aug 2023 10:24:06 -0400 Subject: [PATCH] asim: add zone config satisfiability check Now that we have added the option to generate random span configurations in #110967, we want to have a way to check whether these configurations are satisfiable with the cluster setting. This patch adds the validation check. Please note that the validation process can be expensive with a time complexity of O(max(node count in the cluster, number of replica constraints, number of voter constraints)). To perform this validation and see which span config could lead to failure, please use following command: ``` "eval" [verbose=validate] ``` See also: #110967 Part of: #106192 Release Note: none Epic: none --- pkg/BUILD.bazel | 3 + .../scheduled/scheduled_event_executor.go | 7 + pkg/kv/kvserver/asim/tests/BUILD.bazel | 1 + pkg/kv/kvserver/asim/tests/output.go | 11 +- pkg/kv/kvserver/asim/tests/rand_gen.go | 3 +- pkg/kv/kvserver/asim/tests/rand_test.go | 3 +- .../asim/tests/testdata/rand/default_settings | 6 + .../asim/tests/testdata/rand/rand_cluster | 6 + .../asim/tests/testdata/rand/rand_event | 381 +++++++++++++++-- .../asim/tests/testdata/rand/rand_ranges | 2 + pkg/kv/kvserver/asim/validator/BUILD.bazel | 30 ++ .../asim/validator/config_validator.go | 385 ++++++++++++++++++ pkg/kv/kvserver/asim/validator/validator.go | 49 +++ .../kvserver/asim/validator/validator_test.go | 174 ++++++++ 14 files changed, 1013 insertions(+), 48 deletions(-) create mode 100644 pkg/kv/kvserver/asim/validator/BUILD.bazel create mode 100644 pkg/kv/kvserver/asim/validator/config_validator.go create mode 100644 pkg/kv/kvserver/asim/validator/validator.go create mode 100644 pkg/kv/kvserver/asim/validator/validator_test.go diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index a7b490394804..826bbeeceae9 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -216,6 +216,7 @@ ALL_TESTS = [ "//pkg/kv/kvserver/asim/state:state_test", "//pkg/kv/kvserver/asim/storerebalancer:storerebalancer_test", "//pkg/kv/kvserver/asim/tests:tests_test", + "//pkg/kv/kvserver/asim/validator:validator_test", "//pkg/kv/kvserver/asim/workload:workload_test", "//pkg/kv/kvserver/asim:asim_test", "//pkg/kv/kvserver/batcheval/result:result_test", @@ -1332,6 +1333,8 @@ GO_TARGETS = [ "//pkg/kv/kvserver/asim/storerebalancer:storerebalancer_test", "//pkg/kv/kvserver/asim/tests:tests", "//pkg/kv/kvserver/asim/tests:tests_test", + "//pkg/kv/kvserver/asim/validator:validator", + "//pkg/kv/kvserver/asim/validator:validator_test", "//pkg/kv/kvserver/asim/workload:workload", "//pkg/kv/kvserver/asim/workload:workload_test", "//pkg/kv/kvserver/asim:asim", diff --git a/pkg/kv/kvserver/asim/scheduled/scheduled_event_executor.go b/pkg/kv/kvserver/asim/scheduled/scheduled_event_executor.go index 943e3b82c1ab..f42f678300bb 100644 --- a/pkg/kv/kvserver/asim/scheduled/scheduled_event_executor.go +++ b/pkg/kv/kvserver/asim/scheduled/scheduled_event_executor.go @@ -41,6 +41,8 @@ type EventExecutor interface { // events including details of mutation events, assertion checks, and assertion // results. PrintEventsExecuted() string + // ScheduledEvents returns the list of scheduled events. + ScheduledEvents() ScheduledEventList } // eventExecutor is the private implementation of the EventExecutor interface, @@ -71,6 +73,11 @@ func newExecutorWithNoEvents() *eventExecutor { } } +// ScheduledEvents returns the list of scheduled events. +func (e *eventExecutor) ScheduledEvents() ScheduledEventList { + return e.scheduledEvents +} + // PrintEventSummary returns a string summarizing the executed mutation and // assertion events. func (e *eventExecutor) PrintEventSummary() string { diff --git a/pkg/kv/kvserver/asim/tests/BUILD.bazel b/pkg/kv/kvserver/asim/tests/BUILD.bazel index 4e42dbdb6b44..eb40238df085 100644 --- a/pkg/kv/kvserver/asim/tests/BUILD.bazel +++ b/pkg/kv/kvserver/asim/tests/BUILD.bazel @@ -20,6 +20,7 @@ go_library( "//pkg/kv/kvserver/asim/history", "//pkg/kv/kvserver/asim/scheduled", "//pkg/kv/kvserver/asim/state", + "//pkg/kv/kvserver/asim/validator", "//pkg/roachpb", "//pkg/sql", "//pkg/sql/catalog/catpb", diff --git a/pkg/kv/kvserver/asim/tests/output.go b/pkg/kv/kvserver/asim/tests/output.go index 0e5aae7cd29a..a4d5bc4f721b 100644 --- a/pkg/kv/kvserver/asim/tests/output.go +++ b/pkg/kv/kvserver/asim/tests/output.go @@ -18,6 +18,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/gen" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/scheduled" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/state" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/validator" ) // OutputFlags sets flags for what to output in tests. If you want to add a flag @@ -38,16 +39,17 @@ const ( // OutputTopology displays the topology of cluster configurations. OutputTopology // 1 << 3: 0000 1000 // OutputEvents displays delayed events executed. - OutputEvents // 1 << 4: 0001 0000 + OutputEvents // 1 << 4: 0001 0000 + OutputValidate // 1 << 5: 0010 0000 // OutputAll shows everything above. - OutputAll = (1 << (iota - 1)) - 1 // (1 << 5) - 1: 0001 1111 + OutputAll = (1 << (iota - 1)) - 1 // (1 << 6) - 1: 0011 1111 ) // ScanFlags converts an array of input strings into a single flag. func (o OutputFlags) ScanFlags(inputs []string) OutputFlags { dict := map[string]OutputFlags{"result_only": OutputResultOnly, "test_settings": OutputTestSettings, "initial_state": OutputInitialState, "config_gen": OutputConfigGen, "topology": OutputTopology, - "events": OutputEvents, "all": OutputAll} + "events": OutputEvents, "validate": OutputValidate, "all": OutputAll} flag := OutputResultOnly for _, input := range inputs { flag = flag.set(dict[input]) @@ -168,6 +170,9 @@ func (tr testResultsReport) String() string { if failed || tr.flags.Has(OutputEvents) { buf.WriteString(output.eventExecutor.PrintEventsExecuted()) } + if failed || tr.flags.Has(OutputValidate) { + buf.WriteString(validator.Validate(output.initialState, output.eventExecutor)) + } if failed { buf.WriteString(fmt.Sprintf("sample%d: failed assertion\n%s\n", nthSample, output.reason)) } else { diff --git a/pkg/kv/kvserver/asim/tests/rand_gen.go b/pkg/kv/kvserver/asim/tests/rand_gen.go index 9b88442bc506..7a45eb6def5a 100644 --- a/pkg/kv/kvserver/asim/tests/rand_gen.go +++ b/pkg/kv/kvserver/asim/tests/rand_gen.go @@ -404,7 +404,8 @@ func randomlySelectDataPlacement(randSource *rand.Rand) descpb.DataPlacement { // intervals defined by durationToAssert from the start time. These events apply // a randomly generated zone configuration followed by an assertion event. Note // that these random configurations might be unsatisfiable under the cluster -// setup. +// setup. To validate whether the configurations generated are satisfiable, +// please use "eval" [verbose=validate]. func generateRandomSurvivalGoalsEvents( regions []state.Region, startTime time.Time, diff --git a/pkg/kv/kvserver/asim/tests/rand_test.go b/pkg/kv/kvserver/asim/tests/rand_test.go index d21b00e3002c..fb6cfe173b1c 100644 --- a/pkg/kv/kvserver/asim/tests/rand_test.go +++ b/pkg/kv/kvserver/asim/tests/rand_test.go @@ -83,7 +83,8 @@ const ( // 4. sum of weights in the array should be equal to 1 // 3. "eval" [seed=] [num_iterations=] [duration=] -// [verbose=(<[]("result_only","test_settings","initial_state","config_gen","event","topology","all")>)] +// [verbose=(<[]("result_only","test_settings","initial_state","config_gen", +// "event","topology","validate","all")>)] // e.g. eval seed=20 duration=30m2s verbose=(test_settings,initial_state) // - eval: generates a simulation based on the configuration set with the given // commands. diff --git a/pkg/kv/kvserver/asim/tests/testdata/rand/default_settings b/pkg/kv/kvserver/asim/tests/testdata/rand/default_settings index 94d089ed8c15..f002c4098922 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/rand/default_settings +++ b/pkg/kv/kvserver/asim/tests/testdata/rand/default_settings @@ -232,6 +232,8 @@ AU_EAST AU_EAST_1 └── [1 2 3] no events were scheduled +validation result: + valid sample1: pass ---------------------------------- sample2: start running @@ -247,6 +249,8 @@ AU_EAST AU_EAST_1 └── [1 2 3] no events were scheduled +validation result: + valid sample2: pass ---------------------------------- sample3: start running @@ -262,6 +266,8 @@ AU_EAST AU_EAST_1 └── [1 2 3] no events were scheduled +validation result: + valid sample3: pass ---------------------------------- diff --git a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_cluster b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_cluster index 05c46ce15325..ee9a8b4e2614 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_cluster +++ b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_cluster @@ -89,6 +89,8 @@ US_West US_West_1 └── [17 18] no events were scheduled +validation result: + valid sample1: pass ---------------------------------- sample2: start running @@ -125,6 +127,8 @@ US_West US_West_3 └── [21 22 23 24] no events were scheduled +validation result: + valid sample2: pass ---------------------------------- sample3: start running @@ -159,6 +163,8 @@ US_West US_West_1 └── [17 18] no events were scheduled +validation result: + valid sample3: pass ---------------------------------- diff --git a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_event b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_event index f1b964a6de77..73aabb52f891 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_event +++ b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_event @@ -7,52 +7,10 @@ change_static_option ranges=1 rand_events type=cycle_via_random_survival_goals duration_to_assert_on_event=5m ---- -eval duration=60m num_iterations=1 verbose=(all) +eval duration=60m num_iterations=5 verbose=(events,validate) ---- -test settings - num_iterations=1 duration=1h0m0s ----------------------------------- -generating cluster configurations using randomized option - cluster_gen_type=multi_region -generating ranges configurations using static option - placement_type=even, ranges=1, key_space=200000, replication_factor=3, bytes=0 -generating load configurations using static option - rw_ratio=0.00, rate=0.00, min_block=1, max_block=1, min_key=1, max_key=200000, skewed_access=false -generating events configurations using randomized option - duration_to_assert_on_event=5m0s, type=cycle_via_random_survival_goals -generating settings configurations using static option ---------------------------------- sample1: start running -configurations generated using seed 7894140303635748408 - loaded cluster with - region:US_East [zone=US_East_1(nodes=1,stores=0), zone=US_East_2(nodes=2,stores=0), zone=US_East_3(nodes=3,stores=0), zone=US_East_4(nodes=10,stores=0)] - region:US_West [zone=US_West_1(nodes=2,stores=0)] - region:EU [zone=EU_1(nodes=3,stores=0), zone=EU_2(nodes=3,stores=0), zone=EU_3(nodes=4,stores=0)] - basic ranges with placement_type=even, ranges=1, key_space=200000, replication_factor=3, bytes=0 - basic load with rw_ratio=0.00, rate=0.00, skewed_access=false, min_block_size=1, max_block_size=1, min_key=1, max_key=200000 - number of mutation events=12, number of assertion events=12 -initial state at 2022-03-21 11:00:00: - stores(28)=[s1n1=(replicas(0)),s2n2=(replicas(0)),s3n3=(replicas(0)),s4n4=(replicas(1)),s5n5=(replicas(0)),s6n6=(replicas(0)),s7n7=(replicas(0)),s8n8=(replicas(0)),s9n9=(replicas(0)),s10n10=(replicas(0)),s11n11=(replicas(0)),s12n12=(replicas(0)),s13n13=(replicas(0)),s14n14=(replicas(0)),s15n15=(replicas(0)),s16n16=(replicas(0)),s17n17=(replicas(1)),s18n18=(replicas(0)),s19n19=(replicas(0)),s20n20=(replicas(0)),s21n21=(replicas(0)),s22n22=(replicas(1)),s23n23=(replicas(0)),s24n24=(replicas(0)),s25n25=(replicas(1)),s26n26=(replicas(1)),s27n27=(replicas(0)),s28n28=(replicas(0))] -topology: -EU - EU_1 - │ └── [19 20 21] - EU_2 - │ └── [22 23 24] - EU_3 - │ └── [25 26 27 28] -US_East - US_East_1 - │ └── [1] - US_East_2 - │ └── [2 3] - US_East_3 - │ └── [4 5 6] - US_East_4 - │ └── [7 8 9 10 11 12 13 14 15 16] -US_West - US_West_1 - └── [17 18] 24 events executed: executed at: 2022-03-21 11:00:00 event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > @@ -129,5 +87,342 @@ violating constraints: event: assertion checking event 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 passed +validation result: + event scheduled at 2022-03-21 11:20:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West sample1: pass ---------------------------------- +sample2: start running +24 events executed: + executed at: 2022-03-21 11:00:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:05:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:05:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:10:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:10:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:15:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:15:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:20:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:20:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:25:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:25:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:30:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:30:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:35:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:35:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:40:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:40:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:45:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:45:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:50:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:50:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:55:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:55:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 12:00:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed +validation result: + valid +sample2: pass +---------------------------------- +sample3: start running +24 events executed: + executed at: 2022-03-21 11:00:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:05:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:05:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:10:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:10:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:15:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + failed: conformance unavailable=0 under=0 over=0 violating=0 + actual unavailable=0 under=0, over=0 violating=1 +violating constraints: + r1:{0000000000-9999999999} [(n1,s1):1, (n17,s17):4, (n18,s18):9, (n13,s13):7NON_VOTER, (n28,s28):10NON_VOTER] applying num_replicas=5 num_voters=3 constraints=[+region=US_East:1 +region=US_West:1 +region=EU:1] voter_constraints=[+region=US_West] lease_preferences=[+region=US_West] + executed at: 2022-03-21 11:15:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:20:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:20:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:25:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + failed: conformance unavailable=0 under=0 over=0 violating=0 + actual unavailable=0 under=0, over=0 violating=1 +violating constraints: + r1:{0000000000-9999999999} [(n17,s17):18, (n23,s23):13, (n18,s18):16, (n7,s7):14NON_VOTER, (n19,s19):17NON_VOTER] applying num_replicas=5 num_voters=3 constraints=[+region=US_East:1 +region=US_West:1 +region=EU:1] voter_constraints=[+region=US_West] lease_preferences=[+region=US_West] + executed at: 2022-03-21 11:25:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:30:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:30:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:35:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:35:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:40:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:40:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:45:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:45:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:50:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:50:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:55:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:55:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 12:00:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed +validation result: + event scheduled at 2022-03-21 11:10:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West + event scheduled at 2022-03-21 11:20:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West +sample3: pass +---------------------------------- +sample4: start running +24 events executed: + executed at: 2022-03-21 11:00:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:05:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:05:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:10:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:10:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:15:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:15:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:20:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:20:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:25:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:25:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:30:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:30:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:35:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:35:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:40:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + failed: conformance unavailable=0 under=0 over=0 violating=0 + actual unavailable=0 under=0, over=0 violating=1 +violating constraints: + r1:{0000000000-9999999999} [(n26,s26):12, (n17,s17):17, (n18,s18):18] applying num_voters=3 voter_constraints=[+region=US_West] lease_preferences=[+region=US_West] + executed at: 2022-03-21 11:40:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:45:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:45:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:50:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:50:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:55:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:55:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 12:00:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed +validation result: + event scheduled at 2022-03-21 11:35:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West +sample4: pass +---------------------------------- +sample5: start running +24 events executed: + executed at: 2022-03-21 11:00:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:05:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:05:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:10:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:10:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:15:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:15:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:20:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:20:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:25:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:25:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:30:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:30:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:35:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:35:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:40:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + failed: conformance unavailable=0 under=0 over=0 violating=0 + actual unavailable=0 under=0, over=0 violating=1 +violating constraints: + r1:{0000000000-9999999999} [(n17,s17):15, (n8,s8):12, (n18,s18):16] applying num_voters=3 voter_constraints=[+region=US_West] lease_preferences=[+region=US_West] + executed at: 2022-03-21 11:40:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:45:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + failed: conformance unavailable=0 under=0 over=0 violating=0 + actual unavailable=0 under=0, over=0 violating=1 +violating constraints: + r1:{0000000000-9999999999} [(n17,s17):15, (n8,s8):12, (n18,s18):16] applying num_voters=3 voter_constraints=[+region=US_West] lease_preferences=[+region=US_West] + executed at: 2022-03-21 11:45:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:50:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:50:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:55:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:55:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 12:00:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed +validation result: + event scheduled at 2022-03-21 11:35:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West + event scheduled at 2022-03-21 11:40:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West +sample5: pass +---------------------------------- diff --git a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges index 5477afe44a1d..bf80ac466c0a 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges +++ b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges @@ -188,6 +188,8 @@ US US_3 └── [11 12 13 14 15] no events were scheduled +validation result: + valid sample2: failed assertion conformance unavailable=0 under=0 over=0 violating=0 actual unavailable=0 under=0, over=9 violating=0 diff --git a/pkg/kv/kvserver/asim/validator/BUILD.bazel b/pkg/kv/kvserver/asim/validator/BUILD.bazel new file mode 100644 index 000000000000..76d333642dad --- /dev/null +++ b/pkg/kv/kvserver/asim/validator/BUILD.bazel @@ -0,0 +1,30 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "validator", + srcs = [ + "config_validator.go", + "validator.go", + ], + importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/validator", + visibility = ["//visibility:public"], + deps = [ + "//pkg/kv/kvserver/asim/event", + "//pkg/kv/kvserver/asim/scheduled", + "//pkg/kv/kvserver/asim/state", + "//pkg/roachpb", + "@com_github_cockroachdb_errors//:errors", + ], +) + +go_test( + name = "validator_test", + srcs = ["validator_test.go"], + args = ["-test.timeout=295s"], + embed = [":validator"], + deps = [ + "//pkg/kv/kvserver/asim/state", + "//pkg/spanconfig/spanconfigtestutils", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/kv/kvserver/asim/validator/config_validator.go b/pkg/kv/kvserver/asim/validator/config_validator.go new file mode 100644 index 000000000000..67d31ff94323 --- /dev/null +++ b/pkg/kv/kvserver/asim/validator/config_validator.go @@ -0,0 +1,385 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package validator + +import ( + "math" + + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/state" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/errors" +) + +// processClusterInfo handles region data and returns: 1. A map of zone names to +// their respective region names 2. A map of zone names to the number of +// available nodes in the zones 3. A map of region names to the number of +// available nodes in the regions +func processClusterInfo( + regions []state.Region, +) (map[string]string, map[string]int, map[string]int, int) { + zone := map[string]int{} + region := map[string]int{} + total := 0 + zoneToRegion := map[string]string{} + + for _, r := range regions { + for _, z := range r.Zones { + zoneToRegion[z.Name] = r.Name + zone[z.Name] += z.NodeCount + region[r.Name] += z.NodeCount + total += z.NodeCount + } + } + return zoneToRegion, zone, region, total +} + +type allocationDetailsAtEachLevel struct { + unassigned int + assignedVoters int + assignedNonVoters int +} + +// tryToAddVoters attempts to assign numOfVoters from the available nodes as +// voters. It returns true if there are sufficient available nodes to be +// assigned as voters, and false otherwise. +func (a *allocationDetailsAtEachLevel) tryToAddVoters(numOfVoters int) (success bool) { + if a.unassigned < numOfVoters { + return false + } + a.unassigned -= numOfVoters + a.assignedVoters += numOfVoters + return true +} + +// tryToAddNonVoters attempts to assign numOfNonVoters from the available nodes +// as nonvoters. It returns true if there are sufficient available nodes to be +// assigned as voters, and false otherwise. +func (a *allocationDetailsAtEachLevel) tryToAddNonVoters(numOfNonVoters int) (success bool) { + if a.unassigned < numOfNonVoters { + return false + } + a.unassigned -= numOfNonVoters + a.assignedNonVoters += numOfNonVoters + return true +} + +// promoteNonVoters promotes numOfNonVotersToPromote of nonvoters to voters. +func (a *allocationDetailsAtEachLevel) promoteNonVoters(numOfNonVotersToPromote int) { + if a.assignedNonVoters < numOfNonVotersToPromote { + panic("insufficient non-voters for promotion. This is unexpected as computeNecessaryChanges " + + "should calculate number of non-voters for promotion correctly.") + } + a.assignedNonVoters -= numOfNonVotersToPromote + a.assignedVoters += numOfNonVotersToPromote +} + +type mockAllocator struct { + zoneToRegion map[string]string + zone map[string]allocationDetailsAtEachLevel + region map[string]allocationDetailsAtEachLevel + cluster allocationDetailsAtEachLevel +} + +// newMockAllocator creates a mock allocator based on the provided cluster +// setup. mockAllocator is designed to determine if a config can be satisfied by +// trying to assign replicas in a way that meet the constraints. Note that since +// isSatisfiable directly alters mockAlloactor fields, a new mock allocator +// should be initialized for each isSatisfiable call. +func newMockAllocator( + zoneToRegion map[string]string, zone map[string]int, region map[string]int, total int, +) mockAllocator { + m := mockAllocator{ + zoneToRegion: zoneToRegion, + zone: map[string]allocationDetailsAtEachLevel{}, + region: map[string]allocationDetailsAtEachLevel{}, + cluster: allocationDetailsAtEachLevel{ + unassigned: total, + }, + } + + for k, v := range zone { + m.zone[k] = allocationDetailsAtEachLevel{ + unassigned: v, + } + } + + for k, v := range region { + m.region[k] = allocationDetailsAtEachLevel{ + unassigned: v, + } + } + return m +} + +type constraint struct { + requiredReplicas int + requiredVoters int +} + +// validateConstraint returns nil if the constraint is feasible and error +// (not `nil`) otherwise. +func (m *mockAllocator) validateConstraint(c roachpb.Constraint) error { + if c.Type == roachpb.Constraint_PROHIBITED { + return errors.New("constraints marked as Constraint_PROHIBITED are unsupported") + } + switch c.Key { + case "zone": + _, ok := m.zone[c.Value] + if !ok { + return errors.Newf("zone constraint value %s is not found in the cluster set up", c.Value) + } + case "region": + _, ok := m.region[c.Value] + if !ok { + return errors.Newf("region constraint value %s is not found in the cluster set up", c.Value) + } + default: + return errors.New("only zone and region constraint keys are supported") + } + return nil +} + +// processConstraints validates and extracts region and zone-specific replica +// and voter constraints, storing them in two separate maps. If certain +// constraints fail the validation, they are considered as infeasible. In such +// cases, error(not `nil`) will be returned. +func (m *mockAllocator) processConstraints( + config roachpb.SpanConfig, +) (zoneConstraints map[string]constraint, regionConstraints map[string]constraint, err error) { + zoneConstraints = map[string]constraint{} + regionConstraints = map[string]constraint{} + totalVoters := int(config.GetNumVoters()) + for _, voterConstraint := range config.VoterConstraints { + requiredVoters := int(voterConstraint.NumReplicas) + if voterConstraint.NumReplicas == 0 { + // If NumReplicas is zero, the constraints are applied to all voters. + requiredVoters = totalVoters + } + for _, vc := range voterConstraint.Constraints { + if err := m.validateConstraint(vc); err != nil { + return map[string]constraint{}, map[string]constraint{}, err + } + if vc.Key == "zone" { + zc := zoneConstraints[vc.Value] + zc.requiredVoters = requiredVoters + zoneConstraints[vc.Value] = zc + } else if vc.Key == "region" { + rc := regionConstraints[vc.Value] + rc.requiredVoters = requiredVoters + regionConstraints[vc.Value] = rc + } + } + } + + totalReplicas := int(config.NumReplicas) + for _, replicaConstraint := range config.Constraints { + requiredReplicas := int(replicaConstraint.NumReplicas) + if replicaConstraint.NumReplicas == 0 { + // If NumReplicas is zero, the constraints are applied to all replicas. + requiredReplicas = totalReplicas + } + for _, vc := range replicaConstraint.Constraints { + if err := m.validateConstraint(vc); err != nil { + return map[string]constraint{}, map[string]constraint{}, err + } + if vc.Key == "zone" { + zc := zoneConstraints[vc.Value] + zc.requiredReplicas = requiredReplicas + zoneConstraints[vc.Value] = zc + } else if vc.Key == "region" { + rc := regionConstraints[vc.Value] + rc.requiredReplicas = requiredReplicas + regionConstraints[vc.Value] = rc + } + } + } + return zoneConstraints, regionConstraints, nil +} + +// computeNecessaryChanges computes the necessary minimal changes needed for a +// level to satisfy the constraints, considering the existing number of voters +// and non-voters, as well as the required number of voters and replicas. +func computeNecessaryChanges( + existingVoters int, existingNonVoters int, requiredVoters int, requiredReplicas int, +) (nonVotersToPromote int, votersToAdd int, nonVotersToAdd int) { + votersToAdd = int(math.Max(0, float64(requiredVoters-existingVoters))) + // Try to promote existing nonvoters to voters to satisfy voter constraints + // first. + nonVotersToPromote = int(math.Min(float64(existingNonVoters), float64(votersToAdd))) + // Adjust existing voter and nonvoter count based on promotion. + existingVotersAfterPromotion := existingVoters + nonVotersToPromote + existingNonVotersAfterPromotion := existingNonVoters - nonVotersToPromote + votersToAdd = int(math.Max(0, float64(requiredVoters-existingVotersAfterPromotion))) + nonVotersToAdd = int(math.Max(0, float64(requiredReplicas-requiredVoters-existingNonVotersAfterPromotion))) + return nonVotersToPromote, votersToAdd, nonVotersToAdd +} + +// applyAtRegionLevel attempts to apply the desired changes (nonVotersToPromote, +// votersToAdd, nonVotersToAdd) at the provided region (specified by +// regionName). If enough nodes are available, it makes the changes and returns +// true. Otherwise, it returns false. +func (m *mockAllocator) applyAtRegionLevel( + regionName string, nonVotersToPromote int, votersToAdd int, nonVotersToAdd int, +) bool { + existing, ok := m.region[regionName] + if !ok { + panic("unknown region name in the region constraint. " + + "This is unexpected as validateConstraint should have validated it beforehand.") + } + + existing.promoteNonVoters(nonVotersToPromote) + success := existing.tryToAddVoters(votersToAdd) && existing.tryToAddNonVoters(nonVotersToAdd) + m.region[regionName] = existing + return success +} + +// applyAtClusterLevel attempts to apply the desired changes +// (nonVotersToPromote, votersToAdd, nonVotersToAdd) at the cluster level. If +// enough nodes are available, it makes the changes and returns true. Otherwise, +// it returns false. +func (m *mockAllocator) applyAtClusterLevel( + nonVotersToPromote int, votersToAdd int, nonVotersToAdd int, +) bool { + m.cluster.promoteNonVoters(nonVotersToPromote) + return m.cluster.tryToAddVoters(votersToAdd) && m.cluster.tryToAddNonVoters(nonVotersToAdd) +} + +// applyAtZoneLevel attempts to apply the desired changes (nonVotersToPromote, +// votersToAdd, nonVotersToAdd) at the provided zone (specified by zoneName). If +// enough nodes are available, it makes the changes and returns true. Otherwise, +// it returns false. +func (m *mockAllocator) applyAtZoneLevel( + zoneName string, nonVotersToPromote int, votersToAdd int, nonVotersToAdd int, +) bool { + existing, ok := m.zone[zoneName] + if !ok { + panic("unknown zone name in the zone constraint. " + + "This is unexpected as validateConstraint should have validated it beforehand.") + } + existing.promoteNonVoters(nonVotersToPromote) + success := existing.tryToAddVoters(votersToAdd) && existing.tryToAddNonVoters(nonVotersToAdd) + m.zone[zoneName] = existing + return success +} + +// tryToSatisfyRegionConstraint checks whether the allocator can assign voters +// and replicas in a manner that meets the specified required voters and +// replicas for the region. If possible, it makes the necessary assignment, +// updates the allocator, and returns true. Otherwise, it returns false. +func (m *mockAllocator) tryToSatisfyRegionConstraint( + regionName string, requiredVoters int, requiredReplicas int, +) bool { + existing, ok := m.region[regionName] + if !ok { + panic("unknown region name in the region constraint. " + + "This is unexpected as validateConstraint should have validated it beforehand.") + } + nonVotersToPromote, votersToAdd, nonVotersToAdd := computeNecessaryChanges(existing.assignedVoters, existing.assignedNonVoters, requiredVoters, requiredReplicas) + if nonVotersToPromote == 0 && votersToAdd == 0 && nonVotersToAdd == 0 { + return true + } + // Propagate the changes to region and cluster. + return m.applyAtRegionLevel(regionName, nonVotersToPromote, votersToAdd, nonVotersToAdd) && + m.applyAtClusterLevel(nonVotersToPromote, votersToAdd, nonVotersToAdd) +} + +// tryToSatisfyZoneConstraint checks whether the allocator can assign voters and +// replicas in a manner that meets the specified required voters and replicas +// for the zone. If possible, it makes the necessary assignment, updates the +// allocator, and returns true. Otherwise, it returns false. +func (m *mockAllocator) tryToSatisfyZoneConstraint( + zoneName string, requiredVoters int, requiredReplicas int, +) bool { + existing, ok := m.zone[zoneName] + if !ok { + panic("unknown zone name in the zone constraint. " + + "This is unexpected as validateConstraint should have validated it beforehand.") + } + nonVotersToPromote, votersToAdd, nonVotersToAdd := computeNecessaryChanges(existing.assignedVoters, existing.assignedNonVoters, requiredVoters, requiredReplicas) + if nonVotersToPromote == 0 && votersToAdd == 0 && nonVotersToAdd == 0 { + return true + } + // Propagate the changes to zone, region and cluster. + return m.applyAtZoneLevel(zoneName, nonVotersToPromote, votersToAdd, nonVotersToAdd) && + m.applyAtRegionLevel(m.zoneToRegion[zoneName], nonVotersToPromote, votersToAdd, nonVotersToAdd) && + m.applyAtClusterLevel(nonVotersToPromote, votersToAdd, nonVotersToAdd) +} + +// tryToSatisfyClusterConstraint checks whether the allocator can assign voters +// and replicas in a manner that meets the specified required voters and +// replicas for the cluster. If possible, it makes the necessary assignment, +// updates the allocator, and returns true. Otherwise, it returns false. +func (m *mockAllocator) tryToSatisfyClusterConstraint( + requiredVoters int, requiredReplicas int, +) bool { + existing := m.cluster + if existing.assignedVoters > requiredVoters || existing.assignedNonVoters+existing.assignedVoters > requiredReplicas { + // Impossible to satisfy since minimal voters or replicas needed exceed + // required number of voters and replicas. + return false + } + nonVotersToPromote, votersToAdd, nonVotersToAdd := computeNecessaryChanges(existing.assignedVoters, existing.assignedNonVoters, requiredVoters, requiredReplicas) + if nonVotersToPromote == 0 && votersToAdd == 0 && nonVotersToAdd == 0 { + return true + } + // Propagate the changes to cluster. + return m.applyAtClusterLevel(nonVotersToPromote, votersToAdd, nonVotersToAdd) +} + +// isSatisfiable is a method that assesses whether a given configuration is +// satisfiable within the cluster used to initialize the mockAllocator. It +// returns (true, nil) for satisfiable configurations and (false, reason) for +// unsatisfiable configurations. mockAllocator tries to allocate voters and +// nonvoters across nodes in a manner that satisfies the constraints. If no such +// allocation can be found, the constraint is considered unsatisfiable. The +// allocation is found through the following process: +// 1. Preprocess the config constraints to store replica and voter constraints +// specific to the zone and region in two maps. +// 2. Try to satisfy zone constraints first, region constraints next, and +// cluster constraints in the end. As we allocate replicas for zone constraints, +// some region constraints are also satisfied. +// 3. While trying to satisfy constraints at each hierarchical level, we +// allocate voters or replicas specific to the zone or region only when +// necessary. It first promotes non-voters to voters when possible as voters are +// also replicas and can satisfy both constraints. Additional voters and +// non-voters are then assigned as needed. If any zones or regions lack +// available nodes for assignment, the constraint is considered as +// unsatisfiable. +// +// Limitation: +// - leaseholder preference are not checked and treated as satisfiable. - +// constraints with a key other than zone and region are unsatisfiable. - +// constraints with a value that does not correspond to a known zone or region +// in the cluster setup are unsatisfiable. +// - constraints labeled as Constraint_PROHIBITED are considered unsatisfiable. +func (m *mockAllocator) isSatisfiable(config roachpb.SpanConfig) (success bool, err error) { + zoneConstraints, regionConstraints, err := m.processConstraints(config) + if err != nil { + return false, err + } + + for zoneName, zc := range zoneConstraints { + if !m.tryToSatisfyZoneConstraint(zoneName, zc.requiredVoters, zc.requiredReplicas) { + return false, errors.Newf("failed to satisfy constraints for zone %s", zoneName) + } + } + + for regionName, rc := range regionConstraints { + if !m.tryToSatisfyRegionConstraint(regionName, rc.requiredVoters, rc.requiredReplicas) { + return false, errors.Newf("failed to satisfy constraints for region %s", regionName) + } + } + + if !m.tryToSatisfyClusterConstraint(int(config.GetNumVoters()), int(config.NumReplicas)) { + return false, errors.Newf("failed to satisfy constraints for cluster") + } + return true, nil +} diff --git a/pkg/kv/kvserver/asim/validator/validator.go b/pkg/kv/kvserver/asim/validator/validator.go new file mode 100644 index 000000000000..ef9f1ab826b6 --- /dev/null +++ b/pkg/kv/kvserver/asim/validator/validator.go @@ -0,0 +1,49 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package validator + +import ( + "fmt" + "strings" + + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/event" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/scheduled" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/state" +) + +// Validate checks for any invalid events. Currently, it only checks +// SetSpanConfigEvent for the presence of unsatisfiable configurations. But it +// can be extended to validate the initial state and other events as well. +func Validate(initialState state.State, events scheduled.EventExecutor) string { + buf := strings.Builder{} + buf.WriteString("validation result:\n") + failed := false + + // Since all constraint checks utilize the same cluster info, we process the + // cluster info once and reuse it. + zoneToRegion, zone, region, total := processClusterInfo(initialState.ClusterInfo().Regions) + for _, se := range events.ScheduledEvents() { + if e, ok := se.TargetEvent.(event.SetSpanConfigEvent); ok { + // Create a new mockAllocator for every constraint satisfiability check as + // isSatisfiable directly modify mockAllocator fields. + ma := newMockAllocator(zoneToRegion, zone, region, total) + if success, reason := ma.isSatisfiable(e.Config); !success { + failed = true + buf.WriteString(fmt.Sprintf("\tevent scheduled at %s is expected to lead to failure\n", se.At.Format("2006-01-02 15:04:05"))) + buf.WriteString(fmt.Sprintf("\t\tunsatisfiable: %s\n", reason)) + } + } + } + if !failed { + buf.WriteString("\tvalid\n") + } + return buf.String() +} diff --git a/pkg/kv/kvserver/asim/validator/validator_test.go b/pkg/kv/kvserver/asim/validator/validator_test.go new file mode 100644 index 000000000000..2482653c16f4 --- /dev/null +++ b/pkg/kv/kvserver/asim/validator/validator_test.go @@ -0,0 +1,174 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package validator + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/state" + "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigtestutils" + "github.com/stretchr/testify/require" +) + +// TestValidator validates the correctness of span configuration satisfiability +// check in Validator. +func TestValidator(t *testing.T) { + zoneToRegion, zone, region, total := processClusterInfo(state.ComplexConfig.Regions) + testCases := []struct { + description string + constraint string + expectedSuccess bool + expectedErrorMsgStr string + }{ + { + description: "straightforward valid configuration", + constraint: "num_replicas=5 num_voters=5 " + + "constraints={'+region=US_East':3,'+region=US_West':1,'+region=EU':1} " + + "voter_constraints={'+region=US_East':3,'+region=US_West':1,'+region=EU':1}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "promotion to satisfy region voter constraint", + constraint: "num_replicas=2 num_voters=2 " + + "constraints={'+zone=US_West_1':2} voter_constraints={'+region=US_West':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "promotion to satisfy cluster constraint", + constraint: "num_replicas=2 num_voters=2 " + + "constraints={'+zone=US_West_1':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "promoting partial nonvoters to voters", + constraint: "num_replicas=6 num_voters=3 constraints={'+zone=US_East_3':3} " + + "voter_constraints={'+region=US_East':3,'+zone=US_East_2':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "satisfying zone constraint can help satisfy region constraint", + constraint: "num_replicas=2 " + + "constraints={'+zone=US_West_1':2,'+region=US_West':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "cluster is fully assigned by region constraints", + constraint: "num_replicas=28 num_voters=28 " + + "constraints={'+region=US_East':16,'+region=US_West':2,'+region=EU':10}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "cluster is fully assigned by region and zone constraints", + constraint: "num_replicas=28 num_voters=28 " + + "constraints={'+region=US_East':16,'+region=US_West':2,'+region=EU':10," + + "'+zone=US_East_1':1,'+zone=US_East_2':2,'+zone=US_East_3':3,'+zone=US_East_4':10,'+zone=US_West_1':2," + + "'+zone=EU_1':3,'+zone=EU_2':3,'+zone=EU_3':4}" + + "voter_constraints={'+region=US_East':16,'+region=US_West':2,'+region=EU':10," + + "'+zone=US_East_1':1,'+zone=US_East_2':2,'+zone=US_East_3':3,'+zone=US_East_4':10,'+zone=US_West_1':2," + + "'+zone=EU_1':3,'+zone=EU_2':3,'+zone=EU_3':4}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "configuration for issue #106559", + constraint: "num_replicas=6 num_voters=5 " + + "constraints={'+zone=US_West_1':1,'+zone=EU_1':1,'+zone=US_East_2':2,'+zone=US_East_3':2} " + + "voter_constraints={'+zone=US_West_1':1,'+zone=EU_1':1,'+zone=US_East_2':2,'+zone=US_East_3':1}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "configuration for issue #106559", + constraint: "num_replicas=6 num_voters=5 " + + "constraints={'+zone=US_West_1':1,'+zone=EU_1':1,'+zone=US_East_2':1,'+zone=US_East_3':1} " + + "voter_constraints={'+zone=US_West_1':2,'+zone=US_East_2':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "no voters or replicas needed to add for constraints", + constraint: "num_replicas=0 constraints={'+zone=US_East_1':0}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "insufficient replicas for region constraint", + constraint: "num_replicas=28 num_voters=28 " + + "constraints={'+region=US_East':17,'+region=US_West':2,'+region=EU':10}", + expectedSuccess: false, + expectedErrorMsgStr: "failed to satisfy constraints for region US_East", + }, + { + description: "too many replicas for cluster constraint", + constraint: "num_replicas=10 num_voters=28 " + + "constraints={'+region=US_East':16,'+region=US_West':2,'+region=EU':10}", + expectedSuccess: false, + expectedErrorMsgStr: "failed to satisfy constraints for cluster", + }, + { + description: "too many voters for cluster constraint", + constraint: "num_replicas=28 num_voters=10 " + + "voter_constraints={'+region=US_East':16,'+region=US_West':2,'+region=EU':10}", + expectedSuccess: false, + expectedErrorMsgStr: "failed to satisfy constraints for cluster", + }, + { + description: "zero NumReplicas should use total num_replicas, num_voters for constraints", + constraint: "num_replicas=5 num_voters=3 " + + "constraints={'+region=US_East'} voter_constraints={'+region=US_West'}", + expectedSuccess: false, + expectedErrorMsgStr: "failed to satisfy constraints for region US_West", + }, + { + description: "unsupported constraint key", + constraint: "num_replicas=5 constraints={'+az=US_East'}", + expectedSuccess: false, + expectedErrorMsgStr: "only zone and region constraint keys are supported", + }, + { + description: "unsupported constraint value", + constraint: "num_replicas=5 num_voters=1 voter_constraints={'+region=CA':1}", + expectedSuccess: false, + expectedErrorMsgStr: "region constraint value CA is not found in the cluster set up", + }, + { + description: "unsupported constraint value", + constraint: "num_replicas=5 constraints={'+zone=CA':1}", + expectedSuccess: false, + expectedErrorMsgStr: "zone constraint value CA is not found in the cluster set up", + }, + { + description: "unsupported constraint type", + constraint: "num_replicas=5 constraints={'-region=US_West':1}", + expectedSuccess: false, + expectedErrorMsgStr: "constraints marked as Constraint_PROHIBITED are unsupported", + }, + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + ma := newMockAllocator(zoneToRegion, zone, region, total) + config := spanconfigtestutils.ParseZoneConfig(t, tc.constraint).AsSpanConfig() + success, actualError := ma.isSatisfiable(config) + require.Equal(t, tc.expectedSuccess, success) + if tc.expectedErrorMsgStr == "" { + require.Nil(t, actualError) + } else { + require.EqualError(t, actualError, tc.expectedErrorMsgStr) + } + }) + } +}