Skip to content

Commit

Permalink
Try to stabilise tests by checking for cluster health early (#7184)
Browse files Browse the repository at this point in the history
Add an immediate health check after creation to delay subsequent k8s resource creations to a point when ES is available. The main motivation is test stability in cases where Beats would send data immediately without waiting for proper ES initialisation see #7172
Add an opt-out mechanism for tests where we know the health check cannot succeed. 
TODO: it is unclear at this point why the remote cluster test fails if we wait for cluster 1 to come up first. I will create a follow up issue
  • Loading branch information
pebrc authored Sep 27, 2023
1 parent f080020 commit 800435a
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 6 deletions.
9 changes: 6 additions & 3 deletions test/e2e/es/forced_upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ import (
func TestForceUpgradePendingPods(t *testing.T) {
// create a cluster whose Pods will stay Pending forever
initial := elasticsearch.NewBuilder("force-upgrade-pending").
WithESMasterDataNodes(3, elasticsearch.DefaultResources)
WithESMasterDataNodes(3, elasticsearch.DefaultResources).
SkipImmediateHealthCheck()
initial.Elasticsearch.Spec.NodeSets[0].PodTemplate.Spec.NodeSelector = map[string]string{
"cannot": "be-scheduled",
}
Expand Down Expand Up @@ -55,7 +56,8 @@ func TestForceUpgradePendingPodsInOneStatefulSet(t *testing.T) {
Name: "pending",
Count: 1,
PodTemplate: elasticsearch.ESPodTemplate(elasticsearch.DefaultResources),
})
}).
SkipImmediateHealthCheck()

// make Pods of the 2nds NodeSet pending
initial.Elasticsearch.Spec.NodeSets[1].PodTemplate.Spec.NodeSelector = map[string]string{
Expand Down Expand Up @@ -121,7 +123,8 @@ func TestForceUpgradeBootloopingPods(t *testing.T) {
"masterdata": {
"this leads": "to a bootlooping instance",
},
})
}).
SkipImmediateHealthCheck()

// fix that cluster to remove the wrong configuration
fixed := initial.WithNoESTopology().WithESMasterDataNodes(3, elasticsearch.DefaultResources)
Expand Down
5 changes: 4 additions & 1 deletion test/e2e/es/remote_cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ func TestRemoteCluster(t *testing.T) {
es1Builder := elasticsearch.NewBuilder(name).
WithNamespace(ns1).
WithESMasterDataNodes(1, elasticsearch.DefaultResources).
WithRestrictedSecurityContext()
WithRestrictedSecurityContext().
// TODO figure out why this is necessary
SkipImmediateHealthCheck()

es1LicenseTestContext := elasticsearch.NewLicenseTestContext(test.NewK8sClientOrFatal(), es1Builder.Elasticsearch)

ns2 := test.Ctx().ManagedNamespace(1)
Expand Down
6 changes: 6 additions & 0 deletions test/e2e/test/elasticsearch/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type Builder struct {
GlobalCA bool

mutationToleratedChecksFailureCount int
skipImmediateHealthCheck bool
}

func (b Builder) DeepCopy() *Builder {
Expand Down Expand Up @@ -111,6 +112,11 @@ func newBuilder(name, randSuffix string) Builder {
WithLabel(run.TestNameLabel, name)
}

func (b Builder) SkipImmediateHealthCheck() Builder {
b.skipImmediateHealthCheck = true
return b
}

func (b Builder) WithAnnotation(key, value string) Builder {
if b.Elasticsearch.ObjectMeta.Annotations == nil {
b.Elasticsearch.ObjectMeta.Annotations = make(map[string]string)
Expand Down
8 changes: 6 additions & 2 deletions test/e2e/test/elasticsearch/steps_creation.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ func (b Builder) CreationTestSteps(k *test.K8sClient) test.StepList {
if b.Elasticsearch.Spec.Version != createdEs.Spec.Version {
return fmt.Errorf("expected version %s but got %s", b.Elasticsearch.Spec.Version, createdEs.Spec.Version)
}
// TODO this is incomplete
return nil
if b.skipImmediateHealthCheck {
return nil
}
// check cluster health here already during the create steps to make sure any dependent builders can rely on a ready ES
// this is special casing Elasticsearch a little bit in the interest of test stability see https://github.com/elastic/cloud-on-k8s/issues/7172
return clusterHealthIs(b, k, esv1.ElasticsearchGreenHealth)
}),
},
})
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/test/helper/yaml.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ func (yd *YAMLDecoder) ToBuilders(reader *bufio.Reader, transform BuilderTransfo
builders = append(builders, builder)
}

// sort to make sure the ES builders come first to minimise the chance of test failures as in https://github.com/elastic/cloud-on-k8s/issues/7172
sortBuilders(builders)
return builders, nil
}

Expand Down

0 comments on commit 800435a

Please sign in to comment.