Skip to content

Commit

Permalink
kill tidb-operator pods randomly in e2e (#2125)
Browse files Browse the repository at this point in the history
* kill tidb-operator pods randomly in e2e

* don't use channel in configuration struct

* add successful log
  • Loading branch information
cofyc authored Apr 8, 2020
1 parent e72b21f commit 7d49f55
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 5 deletions.
4 changes: 2 additions & 2 deletions ci/pingcap_tidb_operator_build_kind.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -238,13 +238,13 @@ def call(BUILD_BRANCH, CREDENTIALS_ID, CODECOV_CREDENTIALS_ID) {
def MIRRORS = "DOCKER_IO_MIRROR=http://172.16.4.143:5000 QUAY_IO_MIRROR=http://172.16.4.143:5001"
def builds = [:]
builds["E2E v1.12.10"] = {
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_ ./hack/e2e.sh -- --preload-images", artifacts)
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_ ./hack/e2e.sh -- --preload-images --operator-killer", artifacts)
}
builds["E2E v1.12.10 AdvancedStatefulSet"] = {
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12-advanced-statefulset IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_advanced_statefulset ./hack/e2e.sh -- --preload-images --operator-features AdvancedStatefulSet=true", artifacts)
}
builds["E2E v1.18.0"] = {
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.18 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.18.0 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.18.0_ ./hack/e2e.sh -- -preload-images", artifacts)
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.18 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.18.0 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.18.0_ ./hack/e2e.sh -- -preload-images --operator-killer", artifacts)
}
builds["E2E v1.12.10 Serial"] = {
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12-serial IMAGE_TAG=${GITHASH} SKIP_BUILD=y KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_serial_ ./hack/e2e.sh -- --preload-images --ginkgo.focus='\\[Serial\\]' --install-operator=false", artifacts)
Expand Down
7 changes: 4 additions & 3 deletions tests/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ import (
"os"
"strings"

"github.com/pingcap/tidb-operator/tests/slack"

utiloperator "github.com/pingcap/tidb-operator/tests/e2e/util/operator"
"github.com/pingcap/tidb-operator/tests/pkg/blockwriter"

"github.com/pingcap/tidb-operator/tests/slack"
"gopkg.in/yaml.v2"
"k8s.io/klog"
)
Expand Down Expand Up @@ -77,6 +76,8 @@ type Config struct {
E2EImage string `yaml:"e2e_image" json:"e2e_image"`

PreloadImages bool `yaml:"preload_images" json:"preload_images"`

OperatorKiller utiloperator.OperatorKillerConfig
}

// Nodes defines a series of nodes that belong to the same physical node.
Expand Down
4 changes: 4 additions & 0 deletions tests/e2e/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"flag"
"fmt"
"io/ioutil"
"time"

"github.com/pingcap/tidb-operator/tests"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -46,6 +47,9 @@ func RegisterTiDBOperatorFlags(flags *flag.FlagSet) {
flags.StringVar(&TestConfig.ChartDir, "chart-dir", "", "chart dir")
flags.BoolVar(&TestConfig.PreloadImages, "preload-images", false, "if set, preload images in the bootstrap of e2e process")
flags.StringVar(&TestConfig.BackupImage, "backup-image", "", "backup image")
flags.BoolVar(&TestConfig.OperatorKiller.Enabled, "operator-killer", false, "whether to enable operator kill")
flags.DurationVar(&TestConfig.OperatorKiller.Interval, "operator-killer-interval", 5*time.Minute, "interval between operator kills")
flags.Float64Var(&TestConfig.OperatorKiller.JitterFactor, "operator-killer-jitter-factor", 1, "factor used to jitter operator kills")
}

func AfterReadingAllFlags() error {
Expand Down
24 changes: 24 additions & 0 deletions tests/e2e/e2e.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@ import (
e2econfig "github.com/pingcap/tidb-operator/tests/e2e/config"
utilimage "github.com/pingcap/tidb-operator/tests/e2e/util/image"
utilnode "github.com/pingcap/tidb-operator/tests/e2e/util/node"
utiloperator "github.com/pingcap/tidb-operator/tests/e2e/util/operator"
v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
runtimeutils "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
Expand All @@ -58,6 +60,10 @@ import (
_ "k8s.io/kubernetes/test/e2e/framework/providers/gce"
)

var (
operatorKillerStopCh chan struct{}
)

// This is modified from framework.SetupSuite().
// setupSuite is the boilerplate that can be used to setup ginkgo test suites, on the SynchronizedBeforeSuite step.
// There are certain operations we only want to run once per overall test invocation
Expand Down Expand Up @@ -256,6 +262,21 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte {
ginkgo.By("Installing tidb-operator")
oa.CleanOperatorOrDie(ocfg)
oa.DeployOperatorOrDie(ocfg)
if e2econfig.TestConfig.OperatorKiller.Enabled {
operatorKiller := utiloperator.NewOperatorKiller(e2econfig.TestConfig.OperatorKiller, kubeCli, func() ([]v1.Pod, error) {
podList, err := kubeCli.CoreV1().Pods(ocfg.Namespace).List(metav1.ListOptions{
LabelSelector: labels.SelectorFromSet(map[string]string{
"app.kubernetes.io/name": "tidb-operator",
}).String(),
})
if err != nil {
return nil, err
}
return podList.Items, nil
})
operatorKillerStopCh := make(chan struct{})
go operatorKiller.Run(operatorKillerStopCh)
}
} else {
ginkgo.By("Skip installing tidb-operator")
}
Expand All @@ -269,6 +290,9 @@ var _ = ginkgo.SynchronizedAfterSuite(func() {
framework.CleanupSuite()
}, func() {
framework.AfterSuiteActions()
if operatorKillerStopCh != nil {
close(operatorKillerStopCh)
}
})

// RunE2ETests checks configuration parameters (specified through flags) and then runs
Expand Down
70 changes: 70 additions & 0 deletions tests/e2e/util/operator/operator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package operator

import (
"time"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
)

// OperatorKillerConfig describes configuration for operator killer.
type OperatorKillerConfig struct {
Enabled bool
// Interval is time between operator failures.
Interval time.Duration
// Operator pods will be deleted between [Interval, Interval * (1.0 + JitterFactor)].
JitterFactor float64
}

// OperatorKiller deletes pods of tidb-operator to simulate operator failures.
type OperatorKiller struct {
config OperatorKillerConfig
client kubernetes.Interface
podLister func() ([]v1.Pod, error)
}

// NewOperatorKiller creates a new operator killer.
func NewOperatorKiller(config OperatorKillerConfig, client kubernetes.Interface, podLister func() ([]v1.Pod, error)) *OperatorKiller {
return &OperatorKiller{
config: config,
client: client,
podLister: podLister,
}
}

// Run starts OperatorKiller until stopCh is closed.
func (k *OperatorKiller) Run(stopCh <-chan struct{}) {
// wait.JitterUntil starts work immediately, so wait first.
time.Sleep(wait.Jitter(k.config.Interval, k.config.JitterFactor))
wait.JitterUntil(func() {
pods, err := k.podLister()
if err != nil {
framework.Logf("failed to list operator pods: %v", err)
return
}
for _, pod := range pods {
err = k.client.CoreV1().Pods(pod.Namespace).Delete(pod.Name, &metav1.DeleteOptions{})
if err != nil {
framework.Logf("failed to delete pod %s/%s: %v", pod.Namespace, pod.Name, err)
} else {
framework.Logf("successfully deleted tidb-operator pod %s/%s", pod.Namespace, pod.Name)
}
}
}, k.config.Interval, k.config.JitterFactor, true, stopCh)
}

0 comments on commit 7d49f55

Please sign in to comment.