diff --git a/tests/robustness/README.md b/tests/robustness/README.md index 7859a63308d..98e3c7e4e6f 100644 --- a/tests/robustness/README.md +++ b/tests/robustness/README.md @@ -51,15 +51,15 @@ Errors in etcd model could be causing false positives, which makes the ability t * **For remote runs on CI:** you need to go to the [Prow Dashboard](https://prow.k8s.io/job-history/gs/kubernetes-jenkins/logs/ci-etcd-robustness-amd64), go to a build, download one of the Artifacts (`artifacts/results.zip`), and extract it locally. - ![Prow job run page](./prow_job.png) + ![Prow job run page](readme-images/prow_job.png) - ![Prow job artifacts run page](./prow_job_artifacts_page.png) + ![Prow job artifacts run page](readme-images/prow_job_artifacts_page.png) - ![Prow job artifacts run page artifacts dir](./prow_job_artifacts_dir_page.png) + ![Prow job artifacts run page artifacts dir](readme-images/prow_job_artifacts_dir_page.png) Each directory will be prefixed by `TestRobustness` each containing a robustness test report. - ![artifact archive](./artifact_archive.png) + ![artifact archive](readme-images/artifact_archive.png) Pick one of the directories within the archive corresponding to the failed test scenario. The largest directory by size usually corresponds to the failed scenario. @@ -134,7 +134,7 @@ Open `/tmp/TestRobustnessRegression_Issue14370/1715157774429416550/history.html` Jump to the error in linearization by clicking `[ jump to first error ]` on the top of the page. You should see a graph similar to the one on the image below. -![issue14370](./issue14370.png) +![issue14370](readme-images/issue14370.png) Last correct request (connected with grey line) is a `Put` request that succeeded and got revision `168`. All following requests are invalid (connected with red line) as they have revision `167`. diff --git a/tests/robustness/watch.go b/tests/robustness/client/watch.go similarity index 85% rename from tests/robustness/watch.go rename to tests/robustness/client/watch.go index 3da853f0078..a22afa22193 100644 --- a/tests/robustness/watch.go +++ b/tests/robustness/client/watch.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package robustness +package client import ( "context" @@ -21,25 +21,24 @@ import ( "time" "go.etcd.io/etcd/tests/v3/framework/e2e" - "go.etcd.io/etcd/tests/v3/robustness/client" "go.etcd.io/etcd/tests/v3/robustness/identity" "go.etcd.io/etcd/tests/v3/robustness/report" ) -func collectClusterWatchEvents(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, maxRevisionChan <-chan int64, cfg watchConfig, baseTime time.Time, ids identity.Provider) []report.ClientReport { +func CollectClusterWatchEvents(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, maxRevisionChan <-chan int64, cfg WatchConfig, baseTime time.Time, ids identity.Provider) []report.ClientReport { mux := sync.Mutex{} var wg sync.WaitGroup reports := make([]report.ClientReport, len(clus.Procs)) memberMaxRevisionChans := make([]chan int64, len(clus.Procs)) for i, member := range clus.Procs { - c, err := client.NewRecordingClient(member.EndpointsGRPC(), ids, baseTime) + c, err := NewRecordingClient(member.EndpointsGRPC(), ids, baseTime) if err != nil { t.Fatal(err) } memberMaxRevisionChan := make(chan int64, 1) memberMaxRevisionChans[i] = memberMaxRevisionChan wg.Add(1) - go func(i int, c *client.RecordingClient) { + go func(i int, c *RecordingClient) { defer wg.Done() defer c.Close() watchUntilRevision(ctx, t, c, memberMaxRevisionChan, cfg) @@ -60,12 +59,12 @@ func collectClusterWatchEvents(ctx context.Context, t *testing.T, clus *e2e.Etcd return reports } -type watchConfig struct { - requestProgress bool +type WatchConfig struct { + RequestProgress bool } // watchUntilRevision watches all changes until context is cancelled, it has observed revision provided via maxRevisionChan or maxRevisionChan was closed. -func watchUntilRevision(ctx context.Context, t *testing.T, c *client.RecordingClient, maxRevisionChan <-chan int64, cfg watchConfig) { +func watchUntilRevision(ctx context.Context, t *testing.T, c *RecordingClient, maxRevisionChan <-chan int64, cfg WatchConfig) { var maxRevision int64 var lastRevision int64 = 1 ctx, cancel := context.WithCancel(ctx) @@ -100,7 +99,7 @@ resetWatch: t.Logf("Watch channel closed") continue resetWatch } - if cfg.requestProgress { + if cfg.RequestProgress { c.RequestProgress(ctx) } @@ -124,7 +123,7 @@ resetWatch: } } -func validateGotAtLeastOneProgressNotify(t *testing.T, reports []report.ClientReport, expectProgressNotify bool) { +func ValidateGotAtLeastOneProgressNotify(t *testing.T, reports []report.ClientReport, expectProgressNotify bool) { var gotProgressNotify = false external: for _, r := range reports { diff --git a/tests/robustness/main_test.go b/tests/robustness/main_test.go index 63ba6b37763..40c051f862c 100644 --- a/tests/robustness/main_test.go +++ b/tests/robustness/main_test.go @@ -26,10 +26,12 @@ import ( "go.etcd.io/etcd/tests/v3/framework" "go.etcd.io/etcd/tests/v3/framework/e2e" + "go.etcd.io/etcd/tests/v3/robustness/client" "go.etcd.io/etcd/tests/v3/robustness/failpoint" "go.etcd.io/etcd/tests/v3/robustness/identity" "go.etcd.io/etcd/tests/v3/robustness/model" "go.etcd.io/etcd/tests/v3/robustness/report" + "go.etcd.io/etcd/tests/v3/robustness/scenarios" "go.etcd.io/etcd/tests/v3/robustness/traffic" "go.etcd.io/etcd/tests/v3/robustness/validate" ) @@ -43,21 +45,21 @@ func TestMain(m *testing.M) { func TestRobustnessExploratory(t *testing.T) { testRunner.BeforeTest(t) - for _, s := range exploratoryScenarios(t) { - t.Run(s.name, func(t *testing.T) { + for _, s := range scenarios.Exploratory(t) { + t.Run(s.Name, func(t *testing.T) { lg := zaptest.NewLogger(t) - s.cluster.Logger = lg + s.Cluster.Logger = lg ctx := context.Background() - c, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&s.cluster)) + c, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&s.Cluster)) if err != nil { t.Fatal(err) } defer forcestopCluster(c) - s.failpoint, err = failpoint.PickRandom(c, s.profile) + s.Failpoint, err = failpoint.PickRandom(c, s.Profile) if err != nil { t.Fatal(err) } - t.Run(s.failpoint.Name(), func(t *testing.T) { + t.Run(s.Failpoint.Name(), func(t *testing.T) { testRobustness(ctx, t, lg, s, c) }) }) @@ -66,12 +68,12 @@ func TestRobustnessExploratory(t *testing.T) { func TestRobustnessRegression(t *testing.T) { testRunner.BeforeTest(t) - for _, s := range regressionScenarios(t) { - t.Run(s.name, func(t *testing.T) { + for _, s := range scenarios.Regression(t) { + t.Run(s.Name, func(t *testing.T) { lg := zaptest.NewLogger(t) - s.cluster.Logger = lg + s.Cluster.Logger = lg ctx := context.Background() - c, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&s.cluster)) + c, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&s.Cluster)) if err != nil { t.Fatal(err) } @@ -81,7 +83,7 @@ func TestRobustnessRegression(t *testing.T) { } } -func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s testScenario, c *e2e.EtcdProcessCluster) { +func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s scenarios.TestScenario, c *e2e.EtcdProcessCluster) { r := report.TestReport{Logger: lg, Cluster: c} // t.Failed() returns false during panicking. We need to forcibly // save data on panicking. @@ -90,24 +92,24 @@ func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s testSce defer func() { r.Report(t, panicked) }() - r.Client = s.run(ctx, t, lg, c) + r.Client = runScenario(ctx, t, s, lg, c) persistedRequests, err := report.PersistedRequestsCluster(lg, c) if err != nil { t.Fatal(err) } - failpointImpactingWatch := s.failpoint == failpoint.SleepBeforeSendWatchResponse + failpointImpactingWatch := s.Failpoint == failpoint.SleepBeforeSendWatchResponse if !failpointImpactingWatch { watchProgressNotifyEnabled := c.Cfg.ServerConfig.ExperimentalWatchProgressNotifyInterval != 0 - validateGotAtLeastOneProgressNotify(t, r.Client, s.watch.requestProgress || watchProgressNotifyEnabled) + client.ValidateGotAtLeastOneProgressNotify(t, r.Client, s.Watch.RequestProgress || watchProgressNotifyEnabled) } - validateConfig := validate.Config{ExpectRevisionUnique: s.traffic.ExpectUniqueRevision()} + validateConfig := validate.Config{ExpectRevisionUnique: s.Traffic.ExpectUniqueRevision()} r.Visualize = validate.ValidateAndReturnVisualize(t, lg, validateConfig, r.Client, persistedRequests, 5*time.Minute) panicked = false } -func (s testScenario) run(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster) (reports []report.ClientReport) { +func runScenario(ctx context.Context, t *testing.T, s scenarios.TestScenario, lg *zap.Logger, clus *e2e.EtcdProcessCluster) (reports []report.ClientReport) { ctx, cancel := context.WithCancel(ctx) defer cancel() g := errgroup.Group{} @@ -122,7 +124,7 @@ func (s testScenario) run(ctx context.Context, t *testing.T, lg *zap.Logger, clu defer close(failpointInjected) // Give some time for traffic to reach qps target before injecting failpoint. time.Sleep(time.Second) - fr, err := failpoint.Inject(ctx, t, lg, clus, s.failpoint, baseTime, ids) + fr, err := failpoint.Inject(ctx, t, lg, clus, s.Failpoint, baseTime, ids) if err != nil { t.Error(err) cancel() @@ -138,14 +140,14 @@ func (s testScenario) run(ctx context.Context, t *testing.T, lg *zap.Logger, clu maxRevisionChan := make(chan int64, 1) g.Go(func() error { defer close(maxRevisionChan) - operationReport = traffic.SimulateTraffic(ctx, t, lg, clus, s.profile, s.traffic, failpointInjected, baseTime, ids) + operationReport = traffic.SimulateTraffic(ctx, t, lg, clus, s.Profile, s.Traffic, failpointInjected, baseTime, ids) maxRevision := operationsMaxRevision(operationReport) maxRevisionChan <- maxRevision - lg.Info("Finished simulating traffic", zap.Int64("max-revision", maxRevision)) + lg.Info("Finished simulating Traffic", zap.Int64("max-revision", maxRevision)) return nil }) g.Go(func() error { - watchReport = collectClusterWatchEvents(ctx, t, clus, maxRevisionChan, s.watch, baseTime, ids) + watchReport = client.CollectClusterWatchEvents(ctx, t, clus, maxRevisionChan, s.Watch, baseTime, ids) return nil }) g.Wait() diff --git a/tests/robustness/artifact_archive.png b/tests/robustness/readme-images/artifact_archive.png similarity index 100% rename from tests/robustness/artifact_archive.png rename to tests/robustness/readme-images/artifact_archive.png diff --git a/tests/robustness/issue14370.png b/tests/robustness/readme-images/issue14370.png similarity index 100% rename from tests/robustness/issue14370.png rename to tests/robustness/readme-images/issue14370.png diff --git a/tests/robustness/prow_job.png b/tests/robustness/readme-images/prow_job.png similarity index 100% rename from tests/robustness/prow_job.png rename to tests/robustness/readme-images/prow_job.png diff --git a/tests/robustness/prow_job_artifacts_dir_page.png b/tests/robustness/readme-images/prow_job_artifacts_dir_page.png similarity index 100% rename from tests/robustness/prow_job_artifacts_dir_page.png rename to tests/robustness/readme-images/prow_job_artifacts_dir_page.png diff --git a/tests/robustness/prow_job_artifacts_page.png b/tests/robustness/readme-images/prow_job_artifacts_page.png similarity index 100% rename from tests/robustness/prow_job_artifacts_page.png rename to tests/robustness/readme-images/prow_job_artifacts_page.png diff --git a/tests/robustness/scenarios.go b/tests/robustness/scenarios/scenarios.go similarity index 68% rename from tests/robustness/scenarios.go rename to tests/robustness/scenarios/scenarios.go index af06660f894..c6bf56a1c1a 100644 --- a/tests/robustness/scenarios.go +++ b/tests/robustness/scenarios/scenarios.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package robustness +package scenarios import ( "path/filepath" @@ -22,6 +22,7 @@ import ( "go.etcd.io/etcd/api/v3/version" "go.etcd.io/etcd/client/pkg/v3/fileutil" "go.etcd.io/etcd/tests/v3/framework/e2e" + "go.etcd.io/etcd/tests/v3/robustness/client" "go.etcd.io/etcd/tests/v3/robustness/failpoint" "go.etcd.io/etcd/tests/v3/robustness/options" "go.etcd.io/etcd/tests/v3/robustness/random" @@ -57,16 +58,16 @@ var trafficProfiles = []TrafficProfile{ }, } -type testScenario struct { - name string - failpoint failpoint.Failpoint - cluster e2e.EtcdProcessClusterConfig - traffic traffic.Traffic - profile traffic.Profile - watch watchConfig +type TestScenario struct { + Name string + Failpoint failpoint.Failpoint + Cluster e2e.EtcdProcessClusterConfig + Traffic traffic.Traffic + Profile traffic.Profile + Watch client.WatchConfig } -func exploratoryScenarios(_ *testing.T) []testScenario { +func Exploratory(_ *testing.T) []TestScenario { randomizableOptions := []e2e.EPClusterOption{ options.WithClusterOptionGroups( options.ClusterOptions{options.WithTickMs(29), options.WithElectionMs(271)}, @@ -100,16 +101,16 @@ func exploratoryScenarios(_ *testing.T) []testScenario { if e2e.CouldSetSnapshotCatchupEntries(e2e.BinPath.Etcd) { baseOptions = append(baseOptions, e2e.WithSnapshotCatchUpEntries(100)) } - scenarios := []testScenario{} + scenarios := []TestScenario{} for _, tp := range trafficProfiles { name := filepath.Join(tp.Name, "ClusterOfSize1") clusterOfSize1Options := baseOptions clusterOfSize1Options = append(clusterOfSize1Options, e2e.WithClusterSize(1)) - scenarios = append(scenarios, testScenario{ - name: name, - traffic: tp.Traffic, - profile: tp.Profile, - cluster: *e2e.NewConfig(clusterOfSize1Options...), + scenarios = append(scenarios, TestScenario{ + Name: name, + Traffic: tp.Traffic, + Profile: tp.Profile, + Cluster: *e2e.NewConfig(clusterOfSize1Options...), }) } @@ -121,27 +122,27 @@ func exploratoryScenarios(_ *testing.T) []testScenario { if fileutil.Exist(e2e.BinPath.EtcdLastRelease) { clusterOfSize3Options = append(clusterOfSize3Options, mixedVersionOption) } - scenarios = append(scenarios, testScenario{ - name: name, - traffic: tp.Traffic, - profile: tp.Profile, - cluster: *e2e.NewConfig(clusterOfSize3Options...), + scenarios = append(scenarios, TestScenario{ + Name: name, + Traffic: tp.Traffic, + Profile: tp.Profile, + Cluster: *e2e.NewConfig(clusterOfSize3Options...), }) } if e2e.BinPath.LazyFSAvailable() { newScenarios := scenarios for _, s := range scenarios { // LazyFS increases the load on CPU, so we run it with more lightweight case. - if s.profile.MinimalQPS <= 100 && s.cluster.ClusterSize == 1 { - lazyfsCluster := s.cluster + if s.Profile.MinimalQPS <= 100 && s.Cluster.ClusterSize == 1 { + lazyfsCluster := s.Cluster lazyfsCluster.LazyFSEnabled = true - newScenarios = append(newScenarios, testScenario{ - name: filepath.Join(s.name, "LazyFS"), - failpoint: s.failpoint, - cluster: lazyfsCluster, - traffic: s.traffic, - profile: s.profile.WithoutCompaction(), - watch: s.watch, + newScenarios = append(newScenarios, TestScenario{ + Name: filepath.Join(s.Name, "LazyFS"), + Failpoint: s.Failpoint, + Cluster: lazyfsCluster, + Traffic: s.Traffic, + Profile: s.Profile.WithoutCompaction(), + Watch: s.Watch, }) } } @@ -150,60 +151,60 @@ func exploratoryScenarios(_ *testing.T) []testScenario { return scenarios } -func regressionScenarios(t *testing.T) []testScenario { +func Regression(t *testing.T) []TestScenario { v, err := e2e.GetVersionFromBinary(e2e.BinPath.Etcd) if err != nil { t.Fatalf("Failed checking etcd version binary, binary: %q, err: %v", e2e.BinPath.Etcd, err) } - scenarios := []testScenario{} - scenarios = append(scenarios, testScenario{ - name: "Issue14370", - failpoint: failpoint.RaftBeforeSavePanic, - profile: traffic.LowTraffic, - traffic: traffic.EtcdPutDeleteLease, - cluster: *e2e.NewConfig( + scenarios := []TestScenario{} + scenarios = append(scenarios, TestScenario{ + Name: "Issue14370", + Failpoint: failpoint.RaftBeforeSavePanic, + Profile: traffic.LowTraffic, + Traffic: traffic.EtcdPutDeleteLease, + Cluster: *e2e.NewConfig( e2e.WithClusterSize(1), e2e.WithGoFailEnabled(true), ), }) - scenarios = append(scenarios, testScenario{ - name: "Issue14685", - failpoint: failpoint.DefragBeforeCopyPanic, - profile: traffic.LowTraffic, - traffic: traffic.EtcdPutDeleteLease, - cluster: *e2e.NewConfig( + scenarios = append(scenarios, TestScenario{ + Name: "Issue14685", + Failpoint: failpoint.DefragBeforeCopyPanic, + Profile: traffic.LowTraffic, + Traffic: traffic.EtcdPutDeleteLease, + Cluster: *e2e.NewConfig( e2e.WithClusterSize(1), e2e.WithGoFailEnabled(true), ), }) - scenarios = append(scenarios, testScenario{ - name: "Issue13766", - failpoint: failpoint.KillFailpoint, - profile: traffic.HighTrafficProfile, - traffic: traffic.EtcdPut, - cluster: *e2e.NewConfig( + scenarios = append(scenarios, TestScenario{ + Name: "Issue13766", + Failpoint: failpoint.KillFailpoint, + Profile: traffic.HighTrafficProfile, + Traffic: traffic.EtcdPut, + Cluster: *e2e.NewConfig( e2e.WithSnapshotCount(100), ), }) - scenarios = append(scenarios, testScenario{ - name: "Issue15220", - watch: watchConfig{ - requestProgress: true, + scenarios = append(scenarios, TestScenario{ + Name: "Issue15220", + Watch: client.WatchConfig{ + RequestProgress: true, }, - profile: traffic.LowTraffic, - traffic: traffic.EtcdPutDeleteLease, - failpoint: failpoint.KillFailpoint, - cluster: *e2e.NewConfig( + Profile: traffic.LowTraffic, + Traffic: traffic.EtcdPutDeleteLease, + Failpoint: failpoint.KillFailpoint, + Cluster: *e2e.NewConfig( e2e.WithClusterSize(1), ), }) - scenarios = append(scenarios, testScenario{ - name: "Issue17529", - profile: traffic.HighTrafficProfile, - traffic: traffic.Kubernetes, - failpoint: failpoint.SleepBeforeSendWatchResponse, - cluster: *e2e.NewConfig( + scenarios = append(scenarios, TestScenario{ + Name: "Issue17529", + Profile: traffic.HighTrafficProfile, + Traffic: traffic.Kubernetes, + Failpoint: failpoint.SleepBeforeSendWatchResponse, + Cluster: *e2e.NewConfig( e2e.WithClusterSize(1), e2e.WithGoFailEnabled(true), options.WithSnapshotCount(100), @@ -218,12 +219,12 @@ func regressionScenarios(t *testing.T) []testScenario { if e2e.CouldSetSnapshotCatchupEntries(e2e.BinPath.Etcd) { opts = append(opts, e2e.WithSnapshotCatchUpEntries(100)) } - scenarios = append(scenarios, testScenario{ - name: "Issue15271", - failpoint: failpoint.BlackholeUntilSnapshot, - profile: traffic.HighTrafficProfile, - traffic: traffic.EtcdPut, - cluster: *e2e.NewConfig(opts...), + scenarios = append(scenarios, TestScenario{ + Name: "Issue15271", + Failpoint: failpoint.BlackholeUntilSnapshot, + Profile: traffic.HighTrafficProfile, + Traffic: traffic.EtcdPut, + Cluster: *e2e.NewConfig(opts...), }) } return scenarios