diff --git a/ci/pingcap_tidb_operator_build_kind.groovy b/ci/pingcap_tidb_operator_build_kind.groovy index 4b8baf5b0b..6ff759fa60 100644 --- a/ci/pingcap_tidb_operator_build_kind.groovy +++ b/ci/pingcap_tidb_operator_build_kind.groovy @@ -238,13 +238,13 @@ def call(BUILD_BRANCH, CREDENTIALS_ID, CODECOV_CREDENTIALS_ID) { def MIRRORS = "DOCKER_IO_MIRROR=http://172.16.4.143:5000 QUAY_IO_MIRROR=http://172.16.4.143:5001" def builds = [:] builds["E2E v1.12.10"] = { - build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_ ./hack/e2e.sh -- --preload-images", artifacts) + build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_ ./hack/e2e.sh -- --preload-images --operator-killer", artifacts) } builds["E2E v1.12.10 AdvancedStatefulSet"] = { build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12-advanced-statefulset IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_advanced_statefulset ./hack/e2e.sh -- --preload-images --operator-features AdvancedStatefulSet=true", artifacts) } builds["E2E v1.18.0"] = { - build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.18 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.18.0 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.18.0_ ./hack/e2e.sh -- -preload-images", artifacts) + build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.18 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.18.0 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.18.0_ ./hack/e2e.sh -- -preload-images --operator-killer", artifacts) } builds["E2E v1.12.10 Serial"] = { build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12-serial IMAGE_TAG=${GITHASH} SKIP_BUILD=y KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_serial_ ./hack/e2e.sh -- --preload-images --ginkgo.focus='\\[Serial\\]' --install-operator=false", artifacts) diff --git a/tests/config.go b/tests/config.go index 5cc4875e11..9dd70dd8b6 100644 --- a/tests/config.go +++ b/tests/config.go @@ -21,10 +21,9 @@ import ( "os" "strings" - "github.com/pingcap/tidb-operator/tests/slack" - + utiloperator "github.com/pingcap/tidb-operator/tests/e2e/util/operator" "github.com/pingcap/tidb-operator/tests/pkg/blockwriter" - + "github.com/pingcap/tidb-operator/tests/slack" "gopkg.in/yaml.v2" "k8s.io/klog" ) @@ -77,6 +76,8 @@ type Config struct { E2EImage string `yaml:"e2e_image" json:"e2e_image"` PreloadImages bool `yaml:"preload_images" json:"preload_images"` + + OperatorKiller utiloperator.OperatorKillerConfig } // Nodes defines a series of nodes that belong to the same physical node. diff --git a/tests/e2e/config/config.go b/tests/e2e/config/config.go index 73cf85028c..a037d122aa 100644 --- a/tests/e2e/config/config.go +++ b/tests/e2e/config/config.go @@ -17,6 +17,7 @@ import ( "flag" "fmt" "io/ioutil" + "time" "github.com/pingcap/tidb-operator/tests" v1 "k8s.io/api/core/v1" @@ -46,6 +47,9 @@ func RegisterTiDBOperatorFlags(flags *flag.FlagSet) { flags.StringVar(&TestConfig.ChartDir, "chart-dir", "", "chart dir") flags.BoolVar(&TestConfig.PreloadImages, "preload-images", false, "if set, preload images in the bootstrap of e2e process") flags.StringVar(&TestConfig.BackupImage, "backup-image", "", "backup image") + flags.BoolVar(&TestConfig.OperatorKiller.Enabled, "operator-killer", false, "whether to enable operator kill") + flags.DurationVar(&TestConfig.OperatorKiller.Interval, "operator-killer-interval", 5*time.Minute, "interval between operator kills") + flags.Float64Var(&TestConfig.OperatorKiller.JitterFactor, "operator-killer-jitter-factor", 1, "factor used to jitter operator kills") } func AfterReadingAllFlags() error { diff --git a/tests/e2e/e2e.go b/tests/e2e/e2e.go index 7df4fccf10..9991599cdc 100644 --- a/tests/e2e/e2e.go +++ b/tests/e2e/e2e.go @@ -35,10 +35,12 @@ import ( e2econfig "github.com/pingcap/tidb-operator/tests/e2e/config" utilimage "github.com/pingcap/tidb-operator/tests/e2e/util/image" utilnode "github.com/pingcap/tidb-operator/tests/e2e/util/node" + utiloperator "github.com/pingcap/tidb-operator/tests/e2e/util/operator" v1 "k8s.io/api/core/v1" storagev1 "k8s.io/api/storage/v1" apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" runtimeutils "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" @@ -58,6 +60,10 @@ import ( _ "k8s.io/kubernetes/test/e2e/framework/providers/gce" ) +var ( + operatorKillerStopCh chan struct{} +) + // This is modified from framework.SetupSuite(). // setupSuite is the boilerplate that can be used to setup ginkgo test suites, on the SynchronizedBeforeSuite step. // There are certain operations we only want to run once per overall test invocation @@ -256,6 +262,21 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte { ginkgo.By("Installing tidb-operator") oa.CleanOperatorOrDie(ocfg) oa.DeployOperatorOrDie(ocfg) + if e2econfig.TestConfig.OperatorKiller.Enabled { + operatorKiller := utiloperator.NewOperatorKiller(e2econfig.TestConfig.OperatorKiller, kubeCli, func() ([]v1.Pod, error) { + podList, err := kubeCli.CoreV1().Pods(ocfg.Namespace).List(metav1.ListOptions{ + LabelSelector: labels.SelectorFromSet(map[string]string{ + "app.kubernetes.io/name": "tidb-operator", + }).String(), + }) + if err != nil { + return nil, err + } + return podList.Items, nil + }) + operatorKillerStopCh := make(chan struct{}) + go operatorKiller.Run(operatorKillerStopCh) + } } else { ginkgo.By("Skip installing tidb-operator") } @@ -269,6 +290,9 @@ var _ = ginkgo.SynchronizedAfterSuite(func() { framework.CleanupSuite() }, func() { framework.AfterSuiteActions() + if operatorKillerStopCh != nil { + close(operatorKillerStopCh) + } }) // RunE2ETests checks configuration parameters (specified through flags) and then runs diff --git a/tests/e2e/util/operator/operator.go b/tests/e2e/util/operator/operator.go new file mode 100644 index 0000000000..58d0b87188 --- /dev/null +++ b/tests/e2e/util/operator/operator.go @@ -0,0 +1,70 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package operator + +import ( + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" +) + +// OperatorKillerConfig describes configuration for operator killer. +type OperatorKillerConfig struct { + Enabled bool + // Interval is time between operator failures. + Interval time.Duration + // Operator pods will be deleted between [Interval, Interval * (1.0 + JitterFactor)]. + JitterFactor float64 +} + +// OperatorKiller deletes pods of tidb-operator to simulate operator failures. +type OperatorKiller struct { + config OperatorKillerConfig + client kubernetes.Interface + podLister func() ([]v1.Pod, error) +} + +// NewOperatorKiller creates a new operator killer. +func NewOperatorKiller(config OperatorKillerConfig, client kubernetes.Interface, podLister func() ([]v1.Pod, error)) *OperatorKiller { + return &OperatorKiller{ + config: config, + client: client, + podLister: podLister, + } +} + +// Run starts OperatorKiller until stopCh is closed. +func (k *OperatorKiller) Run(stopCh <-chan struct{}) { + // wait.JitterUntil starts work immediately, so wait first. + time.Sleep(wait.Jitter(k.config.Interval, k.config.JitterFactor)) + wait.JitterUntil(func() { + pods, err := k.podLister() + if err != nil { + framework.Logf("failed to list operator pods: %v", err) + return + } + for _, pod := range pods { + err = k.client.CoreV1().Pods(pod.Namespace).Delete(pod.Name, &metav1.DeleteOptions{}) + if err != nil { + framework.Logf("failed to delete pod %s/%s: %v", pod.Namespace, pod.Name, err) + } else { + framework.Logf("successfully deleted tidb-operator pod %s/%s", pod.Namespace, pod.Name) + } + } + }, k.config.Interval, k.config.JitterFactor, true, stopCh) +}