Skip to content

Commit

Permalink
add a serial test for stable scheduling
Browse files Browse the repository at this point in the history
  • Loading branch information
cofyc committed Mar 18, 2020
1 parent 0119c69 commit c8fafc3
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 59 deletions.
64 changes: 9 additions & 55 deletions tests/actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,6 @@ type OperatorActions interface {
LabelNodesOrDie()
CheckDisasterTolerance(info *TidbClusterConfig) error
CheckDisasterToleranceOrDie(info *TidbClusterConfig)
GetTidbMemberAssignedNodes(info *TidbClusterConfig) (map[string]string, error)
GetTidbMemberAssignedNodesOrDie(info *TidbClusterConfig) map[string]string
CheckTidbMemberAssignedNodes(info *TidbClusterConfig, oldAssignedNodes map[string]string) error
CheckTidbMemberAssignedNodesOrDie(info *TidbClusterConfig, oldAssignedNodes map[string]string)
CheckUpgradeComplete(info *TidbClusterConfig) error
CheckUpgradeCompleteOrDie(info *TidbClusterConfig)
CheckInitSQL(info *TidbClusterConfig) error
Expand Down Expand Up @@ -410,10 +406,7 @@ func (oi *OperatorConfig) OperatorHelmSetString(m map[string]string) string {
set := map[string]string{
"operatorImage": oi.Image,
"controllerManager.autoFailover": "true",
"scheduler.kubeSchedulerImageName": oi.SchedulerImage,
"controllerManager.logLevel": oi.LogLevel,
"scheduler.logLevel": "4",
"imagePullPolicy": string(oi.ImagePullPolicy),
"testMode": strconv.FormatBool(oi.TestMode),
"admissionWebhook.cabundle": oi.Cabundle,
"admissionWebhook.create": strconv.FormatBool(oi.WebhookEnabled),
Expand All @@ -422,6 +415,15 @@ func (oi *OperatorConfig) OperatorHelmSetString(m map[string]string) string {
"admissionWebhook.mutation.pingcapResources": strconv.FormatBool(oi.DefaultingEnabled),
"admissionWebhook.validation.pingcapResources": strconv.FormatBool(oi.ValidatingEnabled),
}
if oi.LogLevel != "" {
set["controllerManager.logLevel"] = oi.LogLevel
}
if oi.SchedulerImage != "" {
set["scheduler.kubeSchedulerImageName"] = oi.SchedulerImage
}
if string(oi.ImagePullPolicy) != "" {
set["imagePullPolicy"] = string(oi.ImagePullPolicy)
}
if oi.ControllerManagerReplicas != nil {
set["controllerManager.replicas"] = strconv.Itoa(*oi.ControllerManagerReplicas)
}
Expand Down Expand Up @@ -893,54 +895,6 @@ func (oa *operatorActions) CleanTidbClusterOrDie(info *TidbClusterConfig) {
}
}

func (oa *operatorActions) GetTidbMemberAssignedNodes(info *TidbClusterConfig) (map[string]string, error) {
assignedNodes := make(map[string]string)
ns := info.Namespace
tcName := info.ClusterName
listOptions := metav1.ListOptions{
LabelSelector: labels.SelectorFromSet(
label.New().Instance(tcName).Component(label.TiDBLabelVal).Labels()).String(),
}
podList, err := oa.kubeCli.CoreV1().Pods(ns).List(listOptions)
if err != nil {
klog.Errorf("failed to get tidb pods: %s/%s, %v", ns, tcName, err)
return nil, err
}
for _, pod := range podList.Items {
assignedNodes[pod.Name] = pod.Spec.NodeName
}
return assignedNodes, nil
}

func (oa *operatorActions) GetTidbMemberAssignedNodesOrDie(info *TidbClusterConfig) map[string]string {
result, err := oa.GetTidbMemberAssignedNodes(info)
if err != nil {
slack.NotifyAndPanic(err)
}
return result
}

func (oa *operatorActions) CheckTidbMemberAssignedNodes(info *TidbClusterConfig, oldAssignedNodes map[string]string) error {
klog.Infof("checking tidb member [%s/%s] assigned nodes", info.Namespace, info.ClusterName)
assignedNodes, err := oa.GetTidbMemberAssignedNodes(info)
if err != nil {
return err
}
for member, node := range oldAssignedNodes {
newNode, ok := assignedNodes[member]
if !ok || newNode != node {
return fmt.Errorf("tidb member %s is not scheduled to %s, new node: %s", member, node, newNode)
}
}
return nil
}

func (oa *operatorActions) CheckTidbMemberAssignedNodesOrDie(info *TidbClusterConfig, oldAssignedNodes map[string]string) {
if err := oa.CheckTidbMemberAssignedNodes(info, oldAssignedNodes); err != nil {
slack.NotifyAndPanic(err)
}
}

func (oa *operatorActions) CheckTidbClusterStatus(info *TidbClusterConfig) error {
klog.Infof("checking tidb cluster [%s/%s] status", info.Namespace, info.ClusterName)

Expand Down
2 changes: 0 additions & 2 deletions tests/cmd/stability/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,10 @@ func run() {
oa.RegisterWebHookAndServiceOrDie(ocfg.WebhookConfigName, namespace, ocfg.WebhookServiceName, certCtx)
ctx, cancel := context.WithCancel(context.Background())
for _, cluster := range clusters {
assignedNodes := oa.GetTidbMemberAssignedNodesOrDie(cluster)
cluster.UpgradeAll(upgradeVersion)
oa.UpgradeTidbClusterOrDie(cluster)
oa.CheckUpgradeOrDie(ctx, cluster)
oa.CheckTidbClusterStatusOrDie(cluster)
oa.CheckTidbMemberAssignedNodesOrDie(cluster, assignedNodes)
}

// configuration change
Expand Down
102 changes: 102 additions & 0 deletions tests/e2e/tidbcluster/serial.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"github.com/pingcap/tidb-operator/tests/pkg/fixture"
v1 "k8s.io/api/core/v1"
apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
Expand Down Expand Up @@ -106,6 +107,107 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() {
}
})

ginkgo.Context("tidb-operator with default values", func() {
var ocfg *tests.OperatorConfig
var oa tests.OperatorActions
var genericCli client.Client

ginkgo.BeforeEach(func() {
ocfg = &tests.OperatorConfig{
Namespace: "pingcap",
ReleaseName: "operator",
Image: cfg.OperatorImage,
Tag: cfg.OperatorTag,
}
oa = tests.NewOperatorActions(cli, c, asCli, aggrCli, apiExtCli, tests.DefaultPollInterval, ocfg, e2econfig.TestConfig, nil, fw, f)
ginkgo.By("Installing CRDs")
oa.CleanCRDOrDie()
oa.InstallCRDOrDie(ocfg)
ginkgo.By("Installing tidb-operator")
oa.CleanOperatorOrDie(ocfg)
oa.DeployOperatorOrDie(ocfg)
var err error
genericCli, err = client.New(config, client.Options{Scheme: scheme.Scheme})
framework.ExpectNoError(err, "failed to create clientset")
})

ginkgo.AfterEach(func() {
ginkgo.By("Uninstall tidb-operator")
oa.CleanOperatorOrDie(ocfg)
ginkgo.By("Uninstalling CRDs")
oa.CleanCRDOrDie()
})

// There is no guarantee but tidb pods should be assigned back to
// previous nodes if no other pods to occupy the positions.
// See docs/design-proposals/tidb-stable-scheduling.md
ginkgo.It("[Feature: StableScheduling] TiDB pods should be scheduled to preivous nodes", func() {
clusterName := "tidb-scheduling"
tc := fixture.GetTidbCluster(ns, clusterName, utilimage.TiDBV3Version)
tc.Spec.PD.Replicas = 1
tc.Spec.TiKV.Replicas = 1
tc.Spec.TiDB.Replicas = 3
err := genericCli.Create(context.TODO(), tc)
framework.ExpectNoError(err)
err = oa.WaitForTidbClusterReady(tc, 30*time.Minute, 15*time.Second)
framework.ExpectNoError(err)

listOptions := metav1.ListOptions{
LabelSelector: labels.SelectorFromSet(
label.New().Instance(clusterName).Component(label.TiDBLabelVal).Labels()).String(),
}
oldPodList, err := c.CoreV1().Pods(ns).List(listOptions)
framework.ExpectNoError(err)

ginkgo.By("Update tidb configuration")
err = controller.GuaranteedUpdate(genericCli, tc, func() error {
tc.Spec.TiDB.Config.TokenLimit = func(i uint) *uint {
return &i
}(2000)
return nil
})
framework.ExpectNoError(err)

ginkgo.By("Waiting for all tidb pods are recreated and assigned to the same node")
getOldPodByName := func(pod *v1.Pod) *v1.Pod {
for _, oldPod := range oldPodList.Items {
if oldPod.Name == pod.Name {
return &oldPod
}
}
return nil
}
err = wait.PollImmediate(time.Second*5, time.Minute*15, func() (bool, error) {
newPodList, err := c.CoreV1().Pods(ns).List(listOptions)
if err != nil && !apierrors.IsNotFound(err) {
return false, err
}
if apierrors.IsNotFound(err) {
return false, nil
}
if len(newPodList.Items) != len(oldPodList.Items) {
return false, nil
}
for _, newPod := range newPodList.Items {
oldPod := getOldPodByName(&newPod)
if oldPod == nil {
return false, fmt.Errorf("found an unexpected pod: %q", newPod.Name)
}
if oldPod.UID == newPod.UID {
// not recreated yet
return false, nil
}
if oldPod.Spec.NodeName != newPod.Spec.NodeName {
// recreated but assigned to another node
return false, fmt.Errorf("pod %q recreated but not assigned to previous node %q, got %q", oldPod.Name, oldPod.Spec.NodeName, newPod.Spec.NodeName)
}
}
return true, nil
})
framework.ExpectNoError(err)
})
})

// tidb-operator with AdvancedStatefulSet feature enabled
ginkgo.Context("[Feature: AdvancedStatefulSet][Feature: Webhook]", func() {
var ocfg *tests.OperatorConfig
Expand Down
2 changes: 0 additions & 2 deletions tests/e2e/tidbcluster/tidbcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,10 @@ var _ = ginkgo.Describe("[tidb-operator] TiDBCluster", func() {
upgradeVersions := cfg.GetUpgradeTidbVersionsOrDie()
ginkgo.By(fmt.Sprintf("Upgrading tidb cluster from %s to %s", cluster.ClusterVersion, upgradeVersions[0]))
ctx, cancel := context.WithCancel(context.Background())
assignedNodes := oa.GetTidbMemberAssignedNodesOrDie(&cluster)
cluster.UpgradeAll(upgradeVersions[0])
oa.UpgradeTidbClusterOrDie(&cluster)
oa.CheckUpgradeOrDie(ctx, &cluster)
oa.CheckTidbClusterStatusOrDie(&cluster)
oa.CheckTidbMemberAssignedNodesOrDie(&cluster, assignedNodes)
cancel()

ginkgo.By("Check webhook is still running")
Expand Down

0 comments on commit c8fafc3

Please sign in to comment.