Skip to content

Commit

Permalink
Fix stability tidb pause case (#542)
Browse files Browse the repository at this point in the history
  • Loading branch information
shuijing198799 authored and weekface committed Jun 1, 2019
1 parent 37d6933 commit 46695be
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 64 deletions.
142 changes: 78 additions & 64 deletions tests/actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ type OperatorActions interface {
GetTidbMemberAssignedNodesOrDie(info *TidbClusterConfig) map[string]string
CheckTidbMemberAssignedNodes(info *TidbClusterConfig, oldAssignedNodes map[string]string) error
CheckTidbMemberAssignedNodesOrDie(info *TidbClusterConfig, oldAssignedNodes map[string]string)
SetPartitionAnnotation(tcName string, nameSpace string, ordinal int) error
CheckManualPauseTiDB(info *TidbClusterConfig) error
CheckManualPauseTiDBOrDie(info *TidbClusterConfig)
}

type operatorActions struct {
Expand Down Expand Up @@ -659,7 +662,7 @@ func (oa *operatorActions) CheckTidbClusterStatus(info *TidbClusterConfig) error

ns := info.Namespace
tcName := info.ClusterName
if err := wait.Poll(oa.pollInterval, 35*time.Minute, func() (bool, error) {
if err := wait.Poll(oa.pollInterval, 30*time.Minute, func() (bool, error) {
var tc *v1alpha1.TidbCluster
var err error
if tc, err = oa.cli.PingcapV1alpha1().TidbClusters(ns).Get(tcName, metav1.GetOptions{}); err != nil {
Expand Down Expand Up @@ -855,7 +858,7 @@ func (oa *operatorActions) CheckScaledCorrectly(info *TidbClusterConfig, podUIDs
})
}

func setPartitionAnnotation(tcName string, nameSpace string, ordinal int) error {
func (oa *operatorActions) SetPartitionAnnotation(tcName string, nameSpace string, ordinal int) error {
// add annotation to pause statefulset upgrade process
cmd := fmt.Sprintf("kubectl annotate tc %s -n %s tidb.pingcap.com/tidb-partition=%d --overwrite",
tcName, nameSpace, ordinal)
Expand All @@ -875,19 +878,6 @@ func (oa *operatorActions) UpgradeTidbCluster(info *TidbClusterConfig) error {
}
oa.EmitEvent(info, "UpgradeTidbCluster")

// get tidbSet from apiserver
tidbSetName := controller.TiDBMemberName(info.ClusterName)
tidbSet, err := oa.kubeCli.AppsV1beta1().StatefulSets(info.Namespace).Get(tidbSetName, metav1.GetOptions{})
if err != nil {
return pingcapErrors.Wrapf(err, "failed to get stateful set [%s/%s] setName %s", info.Namespace, info.ClusterName, tidbSetName)
}

// add annotation to pause statefulset upgrade process
err = setPartitionAnnotation(info.ClusterName, info.Namespace, int(tidbSet.Status.Replicas-1))
if err != nil {
return pingcapErrors.Wrapf(err, "failed to add annotation to [%s/%s]", info.Namespace, info.ClusterName)
}

cmd := oa.getHelmUpgradeClusterCmd(info, nil)
glog.Info("[UPGRADE] " + cmd)
res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput()
Expand Down Expand Up @@ -1056,38 +1046,6 @@ func (oa *operatorActions) tidbMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, e
tcName := tc.GetName()
ns := tc.GetNamespace()
tidbSetName := controller.TiDBMemberName(tcName)
tidbUpgradeAnnotationStr, ok := tc.Annotations[label.AnnTiDBPartition]
if !ok {
tidbUpgradeAnnotationStr = "0"
}

tidbUpgradeAnnotation, err := strconv.ParseInt(tidbUpgradeAnnotationStr, 10, 32)
if err != nil {
return false, nil
}

pauseCorrect := func(set *v1beta1.StatefulSet) bool {
return (*set.Spec.UpdateStrategy.RollingUpdate.Partition) >= int32(tidbUpgradeAnnotation)
}

upgradePaused := func() bool {

podName := fmt.Sprintf("%s-%d", controller.TiDBMemberName(tc.Name), tidbUpgradeAnnotation)

tidbPod, err := oa.kubeCli.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{})
if err != nil {
glog.Errorf("fail to get tidb po name %s namespace %s ", podName, ns)
return false
}
if tidbPod.Labels[v1beta1.ControllerRevisionHashLabelKey] == tc.Status.TiDB.StatefulSet.UpdateRevision &&
tc.Status.TiDB.Phase == v1alpha1.UpgradePhase {
if member, ok := tc.Status.TiDB.Members[tidbPod.Name]; ok && member.Health {
return true
}
}

return false
}

tidbSet, err := oa.kubeCli.AppsV1beta1().StatefulSets(ns).Get(tidbSetName, metav1.GetOptions{})
if err != nil {
Expand Down Expand Up @@ -1122,23 +1080,6 @@ func (oa *operatorActions) tidbMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, e
return false, nil
}

if upgradePaused() {

time.Sleep(30 * time.Second)

if !pauseCorrect(tidbSet) {
return false, fmt.Errorf("pause partition is not correct in upgrade phase [%s/%s] partition %d annotation %d",
ns, tidbSetName, (*tidbSet.Spec.UpdateStrategy.RollingUpdate.Partition), tidbUpgradeAnnotation)
}

err := setPartitionAnnotation(tcName, ns, 0)
if err != nil {
glog.Errorf("fail to set annotation for [%s/%s]", ns, tidbSetName)
return false, nil
}
return false, nil
}

if c, ok := getMemberContainer(oa.kubeCli, ns, tidbSetName); !ok || tc.Spec.TiDB.Image != c.Image {
glog.Infof("statefulset: %s/%s .spec.template.spec.containers[name=tidb].image(%s) != %s",
ns, tidbSetName, c.Image, tc.Spec.TiDB.Image)
Expand Down Expand Up @@ -2431,3 +2372,76 @@ func (oa *operatorActions) getHelmUpgradeClusterCmd(info *TidbClusterConfig, set

return cmd
}

func (oa *operatorActions) CheckManualPauseTiDB(info *TidbClusterConfig) error {

var tc *v1alpha1.TidbCluster
var tidbSet *v1beta1.StatefulSet
var err error
ns := info.Namespace

// set partition annotation to protect tidb pod
if err = oa.SetPartitionAnnotation(info.ClusterName, ns, 1); err != nil {
return fmt.Errorf("failed to SetPartitionAnnotation: [%s/%s], %v", ns, info.ClusterName, err)
}

fn := func() (bool, error) {

if tc, err = oa.cli.PingcapV1alpha1().TidbClusters(ns).Get(info.ClusterName, metav1.GetOptions{}); err != nil {
glog.Infof("failed to get tidbcluster: [%s/%s], %v", ns, info.ClusterName, err)
return false, nil
}

podName := fmt.Sprintf("%s-%d", controller.TiDBMemberName(tc.Name), 1)

tidbPod, err := oa.kubeCli.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{})
if err != nil {
glog.Infof("fail to get pod in CheckManualPauseTiDB [%s/%s]", ns, podName)
return false, nil
}

if tidbPod.Labels[v1beta1.ControllerRevisionHashLabelKey] == tc.Status.TiDB.StatefulSet.UpdateRevision &&
tc.Status.TiDB.Phase == v1alpha1.UpgradePhase {
if member, ok := tc.Status.TiDB.Members[tidbPod.Name]; !ok || !member.Health {
glog.Infof("wait for tidb pod [%s/%s] ready member health %t ok %t", ns, podName, member.Health, ok)
} else {
return true, nil
}
} else {
glog.Infof("tidbset is not in upgrade phase or pod is not upgrade done [%s/%s]", ns, podName)
}

return false, nil
}

// wait for the tidb statefulset is upgrade to the protect one
if err = wait.Poll(DefaultPollInterval, DefaultPollTimeout, fn); err != nil {
return fmt.Errorf("fail to upgrade to annotation TiDB pod : %v", err)
}

time.Sleep(30 * time.Second)

tidbSetName := controller.TiDBMemberName(info.ClusterName)
if tidbSet, err = oa.kubeCli.AppsV1beta1().StatefulSets(ns).Get(tidbSetName, metav1.GetOptions{}); err != nil {
return fmt.Errorf("failed to get statefulset: [%s/%s], %v", ns, tidbSetName, err)
}

if (*tidbSet.Spec.UpdateStrategy.RollingUpdate.Partition) < 1 {
return fmt.Errorf("pause partition is not correct in upgrade phase [%s/%s] partition %d annotation %d",
ns, tidbSetName, (*tidbSet.Spec.UpdateStrategy.RollingUpdate.Partition), 1)
}

if err = oa.SetPartitionAnnotation(tc.Name, ns, 0); err != nil {
return fmt.Errorf("fail to set annotation for [%s/%s]", ns, tidbSetName)
}

return nil
}

func (oa *operatorActions) CheckManualPauseTiDBOrDie(info *TidbClusterConfig) {
// add annotation to pause statefulset upgrade process and check
err := oa.CheckManualPauseTiDB(info)
if err != nil {
slack.NotifyAndPanic(err)
}
}
6 changes: 6 additions & 0 deletions tests/cmd/e2e/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,12 @@ func main() {
glog.Fatal(err)
}
}

// only check manual pause for 1 cluster
if len(clusterInfos) >= 1 {
oa.CheckManualPauseTiDBOrDie(clusterInfos[0])
}

for _, clusterInfo := range clusterInfos {
if err = oa.CheckTidbClusterStatus(clusterInfo); err != nil {
glog.Fatal(err)
Expand Down
5 changes: 5 additions & 0 deletions tests/cmd/stability/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,13 @@ func run(oa tests.OperatorActions,
cluster2.UpgradeAll(firstUpgradeVersion)
oa.UpgradeTidbClusterOrDie(cluster1)
oa.UpgradeTidbClusterOrDie(cluster2)

// check pause upgrade feature in cluster2
oa.CheckManualPauseTiDBOrDie(cluster2)

oa.CheckTidbClusterStatusOrDie(cluster1)
oa.CheckTidbClusterStatusOrDie(cluster2)

oa.CheckTidbMemberAssignedNodesOrDie(cluster1, assignedNodes1)
oa.CheckTidbMemberAssignedNodesOrDie(cluster2, assignedNodes2)

Expand Down

0 comments on commit 46695be

Please sign in to comment.