diff --git a/pkg/manager/member/tiflash_failover.go b/pkg/manager/member/tiflash_failover.go index 569f9af7bbf..7fb0cfad045 100644 --- a/pkg/manager/member/tiflash_failover.go +++ b/pkg/manager/member/tiflash_failover.go @@ -18,12 +18,14 @@ import ( "time" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + "github.com/pingcap/tidb-operator/pkg/util" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" "k8s.io/klog" ) +// TODO reuse tikvFailover since we share the same logic type tiflashFailover struct { tiflashFailoverPeriod time.Duration recorder record.EventRecorder @@ -34,6 +36,16 @@ func NewTiFlashFailover(tiflashFailoverPeriod time.Duration, recorder record.Eve return &tiflashFailover{tiflashFailoverPeriod, recorder} } +func (tff *tiflashFailover) isPodDesired(tc *v1alpha1.TidbCluster, podName string) bool { + ordinals := tc.TiFlashStsDesiredOrdinals(true) + ordinal, err := util.GetOrdinalFromPodName(podName) + if err != nil { + klog.Errorf("unexpected pod name %q: %v", podName, err) + return false + } + return ordinals.Has(ordinal) +} + func (tff *tiflashFailover) Failover(tc *v1alpha1.TidbCluster) error { ns := tc.GetNamespace() tcName := tc.GetName() @@ -43,6 +55,12 @@ func (tff *tiflashFailover) Failover(tc *v1alpha1.TidbCluster) error { if store.LastTransitionTime.IsZero() { continue } + if !tff.isPodDesired(tc, podName) { + // we should ignore the store record of deleted pod, otherwise the + // record of deleted pod may be added back to failure stores + // (before it enters into Offline/Tombstone state) + continue + } deadline := store.LastTransitionTime.Add(tff.tiflashFailoverPeriod) exist := false for _, failureStore := range tc.Status.TiFlash.FailureStores { @@ -74,8 +92,15 @@ func (tff *tiflashFailover) Failover(tc *v1alpha1.TidbCluster) error { return nil } -func (tff *tiflashFailover) Recover(_ *v1alpha1.TidbCluster) { - // Do nothing now +func (tff *tiflashFailover) Recover(tc *v1alpha1.TidbCluster) { + for key, failureStore := range tc.Status.TiFlash.FailureStores { + if !tff.isPodDesired(tc, failureStore.PodName) { + // If we delete the pods, e.g. by using advanced statefulset delete + // slots feature. We should remove the record of undesired pods, + // otherwise an extra replacement pod will be created. + delete(tc.Status.TiFlash.FailureStores, key) + } + } } type fakeTiFlashFailover struct{} diff --git a/pkg/manager/member/tikv_failover.go b/pkg/manager/member/tikv_failover.go index 036659a4719..559865856f8 100644 --- a/pkg/manager/member/tikv_failover.go +++ b/pkg/manager/member/tikv_failover.go @@ -18,6 +18,7 @@ import ( "time" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + "github.com/pingcap/tidb-operator/pkg/util" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" @@ -34,6 +35,16 @@ func NewTiKVFailover(tikvFailoverPeriod time.Duration, recorder record.EventReco return &tikvFailover{tikvFailoverPeriod, recorder} } +func (tf *tikvFailover) isPodDesired(tc *v1alpha1.TidbCluster, podName string) bool { + ordinals := tc.TiKVStsDesiredOrdinals(true) + ordinal, err := util.GetOrdinalFromPodName(podName) + if err != nil { + klog.Errorf("unexpected pod name %q: %v", podName, err) + return false + } + return ordinals.Has(ordinal) +} + func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error { ns := tc.GetNamespace() tcName := tc.GetName() @@ -43,6 +54,12 @@ func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error { if store.LastTransitionTime.IsZero() { continue } + if !tf.isPodDesired(tc, podName) { + // we should ignore the store record of deleted pod, otherwise the + // record of deleted pod may be added back to failure stores + // (before it enters into Offline/Tombstone state) + continue + } deadline := store.LastTransitionTime.Add(tf.tikvFailoverPeriod) exist := false for _, failureStore := range tc.Status.TiKV.FailureStores { @@ -75,8 +92,15 @@ func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error { return nil } -func (tf *tikvFailover) Recover(_ *v1alpha1.TidbCluster) { - // Do nothing now +func (tf *tikvFailover) Recover(tc *v1alpha1.TidbCluster) { + for key, failureStore := range tc.Status.TiKV.FailureStores { + if !tf.isPodDesired(tc, failureStore.PodName) { + // If we delete the pods, e.g. by using advanced statefulset delete + // slots feature. We should remove the record of undesired pods, + // otherwise an extra replacement pod will be created. + delete(tc.Status.TiKV.FailureStores, key) + } + } } type fakeTiKVFailover struct{} diff --git a/pkg/manager/member/tikv_failover_test.go b/pkg/manager/member/tikv_failover_test.go index 477d698a97d..70f62c536b1 100644 --- a/pkg/manager/member/tikv_failover_test.go +++ b/pkg/manager/member/tikv_failover_test.go @@ -25,31 +25,12 @@ import ( ) func TestTiKVFailoverFailover(t *testing.T) { - g := NewGomegaWithT(t) - - type testcase struct { + tests := []struct { name string update func(*v1alpha1.TidbCluster) err bool - expectFn func(*v1alpha1.TidbCluster) - } - testFn := func(test *testcase, t *testing.T) { - t.Log(test.name) - tc := newTidbClusterForPD() - tc.Spec.TiKV.MaxFailoverCount = pointer.Int32Ptr(3) - test.update(tc) - tikvFailover := newFakeTiKVFailover() - - err := tikvFailover.Failover(tc) - if test.err { - g.Expect(err).To(HaveOccurred()) - } else { - g.Expect(err).NotTo(HaveOccurred()) - } - test.expectFn(tc) - } - - tests := []testcase{ + expectFn func(t *testing.T, tc *v1alpha1.TidbCluster) + }{ { name: "normal", update: func(tc *v1alpha1.TidbCluster) { @@ -67,8 +48,8 @@ func TestTiKVFailoverFailover(t *testing.T) { } }, err: false, - expectFn: func(tc *v1alpha1.TidbCluster) { - g.Expect(int(tc.Spec.TiKV.Replicas)).To(Equal(3)) + expectFn: func(t *testing.T, tc *v1alpha1.TidbCluster) { + g := NewGomegaWithT(t) g.Expect(len(tc.Status.TiKV.FailureStores)).To(Equal(2)) }, }, @@ -80,8 +61,8 @@ func TestTiKVFailoverFailover(t *testing.T) { } }, err: false, - expectFn: func(tc *v1alpha1.TidbCluster) { - g.Expect(int(tc.Spec.TiKV.Replicas)).To(Equal(3)) + expectFn: func(t *testing.T, tc *v1alpha1.TidbCluster) { + g := NewGomegaWithT(t) g.Expect(len(tc.Status.TiKV.FailureStores)).To(Equal(0)) }, }, @@ -97,8 +78,8 @@ func TestTiKVFailoverFailover(t *testing.T) { } }, err: false, - expectFn: func(tc *v1alpha1.TidbCluster) { - g.Expect(int(tc.Spec.TiKV.Replicas)).To(Equal(3)) + expectFn: func(t *testing.T, tc *v1alpha1.TidbCluster) { + g := NewGomegaWithT(t) g.Expect(len(tc.Status.TiKV.FailureStores)).To(Equal(0)) }, }, @@ -113,8 +94,8 @@ func TestTiKVFailoverFailover(t *testing.T) { } }, err: false, - expectFn: func(tc *v1alpha1.TidbCluster) { - g.Expect(int(tc.Spec.TiKV.Replicas)).To(Equal(3)) + expectFn: func(t *testing.T, tc *v1alpha1.TidbCluster) { + g := NewGomegaWithT(t) g.Expect(len(tc.Status.TiKV.FailureStores)).To(Equal(0)) }, }, @@ -136,8 +117,8 @@ func TestTiKVFailoverFailover(t *testing.T) { } }, err: false, - expectFn: func(tc *v1alpha1.TidbCluster) { - g.Expect(int(tc.Spec.TiKV.Replicas)).To(Equal(3)) + expectFn: func(t *testing.T, tc *v1alpha1.TidbCluster) { + g := NewGomegaWithT(t) g.Expect(len(tc.Status.TiKV.FailureStores)).To(Equal(1)) }, }, @@ -147,17 +128,17 @@ func TestTiKVFailoverFailover(t *testing.T) { tc.Status.TiKV.Stores = map[string]v1alpha1.TiKVStore{ "3": { State: v1alpha1.TiKVStateDown, - PodName: "tikv-3", + PodName: "tikv-0", LastTransitionTime: metav1.Time{Time: time.Now().Add(-70 * time.Minute)}, }, - "10": { + "4": { State: v1alpha1.TiKVStateUp, - PodName: "tikv-10", + PodName: "tikv-4", LastTransitionTime: metav1.Time{Time: time.Now().Add(-70 * time.Minute)}, }, - "11": { + "5": { State: v1alpha1.TiKVStateUp, - PodName: "tikv-11", + PodName: "tikv-5", LastTransitionTime: metav1.Time{Time: time.Now().Add(-61 * time.Minute)}, }, } @@ -173,8 +154,8 @@ func TestTiKVFailoverFailover(t *testing.T) { } }, err: false, - expectFn: func(tc *v1alpha1.TidbCluster) { - g.Expect(int(tc.Spec.TiKV.Replicas)).To(Equal(3)) + expectFn: func(t *testing.T, tc *v1alpha1.TidbCluster) { + g := NewGomegaWithT(t) g.Expect(len(tc.Status.TiKV.FailureStores)).To(Equal(3)) }, }, @@ -187,14 +168,14 @@ func TestTiKVFailoverFailover(t *testing.T) { PodName: "tikv-3", LastTransitionTime: metav1.Time{Time: time.Now().Add(-70 * time.Minute)}, }, - "10": { + "4": { State: v1alpha1.TiKVStateDown, - PodName: "tikv-10", + PodName: "tikv-4", LastTransitionTime: metav1.Time{Time: time.Now().Add(-70 * time.Minute)}, }, - "11": { + "5": { State: v1alpha1.TiKVStateUp, - PodName: "tikv-11", + PodName: "tikv-5", LastTransitionTime: metav1.Time{Time: time.Now().Add(-61 * time.Minute)}, }, } @@ -210,8 +191,8 @@ func TestTiKVFailoverFailover(t *testing.T) { } }, err: false, - expectFn: func(tc *v1alpha1.TidbCluster) { - g.Expect(int(tc.Spec.TiKV.Replicas)).To(Equal(3)) + expectFn: func(t *testing.T, tc *v1alpha1.TidbCluster) { + g := NewGomegaWithT(t) g.Expect(len(tc.Status.TiKV.FailureStores)).To(Equal(3)) }, }, @@ -219,19 +200,19 @@ func TestTiKVFailoverFailover(t *testing.T) { name: "exceed max failover count2", update: func(tc *v1alpha1.TidbCluster) { tc.Status.TiKV.Stores = map[string]v1alpha1.TiKVStore{ - "12": { + "0": { State: v1alpha1.TiKVStateDown, - PodName: "tikv-12", + PodName: "tikv-0", LastTransitionTime: metav1.Time{Time: time.Now().Add(-70 * time.Minute)}, }, - "13": { + "4": { State: v1alpha1.TiKVStateDown, - PodName: "tikv-13", + PodName: "tikv-4", LastTransitionTime: metav1.Time{Time: time.Now().Add(-61 * time.Minute)}, }, - "14": { + "5": { State: v1alpha1.TiKVStateDown, - PodName: "tikv-14", + PodName: "tikv-5", LastTransitionTime: metav1.Time{Time: time.Now().Add(-70 * time.Minute)}, }, } @@ -251,8 +232,8 @@ func TestTiKVFailoverFailover(t *testing.T) { } }, err: false, - expectFn: func(tc *v1alpha1.TidbCluster) { - g.Expect(int(tc.Spec.TiKV.Replicas)).To(Equal(3)) + expectFn: func(t *testing.T, tc *v1alpha1.TidbCluster) { + g := NewGomegaWithT(t) g.Expect(len(tc.Status.TiKV.FailureStores)).To(Equal(3)) }, }, @@ -293,14 +274,29 @@ func TestTiKVFailoverFailover(t *testing.T) { } }, err: false, - expectFn: func(tc *v1alpha1.TidbCluster) { - g.Expect(int(tc.Spec.TiKV.Replicas)).To(Equal(3)) + expectFn: func(t *testing.T, tc *v1alpha1.TidbCluster) { + g := NewGomegaWithT(t) g.Expect(len(tc.Status.TiKV.FailureStores)).To(Equal(3)) }, }, } - for i := range tests { - testFn(&tests[i], t) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewGomegaWithT(t) + tc := newTidbClusterForPD() + tc.Spec.TiKV.Replicas = 6 + tc.Spec.TiKV.MaxFailoverCount = pointer.Int32Ptr(3) + tt.update(tc) + tikvFailover := newFakeTiKVFailover() + + err := tikvFailover.Failover(tc) + if tt.err { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).NotTo(HaveOccurred()) + } + tt.expectFn(t, tc) + }) } } diff --git a/pkg/manager/member/tikv_member_manager.go b/pkg/manager/member/tikv_member_manager.go index 086435171a5..3cbe932b28d 100644 --- a/pkg/manager/member/tikv_member_manager.go +++ b/pkg/manager/member/tikv_member_manager.go @@ -240,6 +240,10 @@ func (tkmm *tikvMemberManager) syncStatefulSetForTidbCluster(tc *v1alpha1.TidbCl } } + if len(tc.Status.TiKV.FailureStores) > 0 { + tkmm.tikvFailover.Recover(tc) + } + return updateStatefulSet(tkmm.setControl, tc, newSet, oldSet) } diff --git a/tests/e2e/tidbcluster/stability.go b/tests/e2e/tidbcluster/stability.go index 7aad2f35c67..47b10246932 100644 --- a/tests/e2e/tidbcluster/stability.go +++ b/tests/e2e/tidbcluster/stability.go @@ -30,6 +30,7 @@ import ( "github.com/pingcap/tidb-operator/tests" e2econfig "github.com/pingcap/tidb-operator/tests/e2e/config" e2eframework "github.com/pingcap/tidb-operator/tests/e2e/framework" + e2eutil "github.com/pingcap/tidb-operator/tests/e2e/util" utilcloud "github.com/pingcap/tidb-operator/tests/e2e/util/cloud" utilimage "github.com/pingcap/tidb-operator/tests/e2e/util/image" utilnode "github.com/pingcap/tidb-operator/tests/e2e/util/node" @@ -858,4 +859,116 @@ var _ = ginkgo.Describe("[tidb-operator][Stability]", func() { }) + ginkgo.Context("[Feature: AdvancedStatefulSet][Feature: AutoFailover] operator with advanced statefulset and short auto-failover periods", func() { + var ocfg *tests.OperatorConfig + var oa tests.OperatorActions + var genericCli client.Client + failoverPeriod := time.Minute + + ginkgo.BeforeEach(func() { + ocfg = &tests.OperatorConfig{ + Namespace: ns, + ReleaseName: "operator", + Image: cfg.OperatorImage, + Tag: cfg.OperatorTag, + LogLevel: "4", + TestMode: true, + StringValues: map[string]string{ + "controllerManager.pdFailoverPeriod": failoverPeriod.String(), + "controllerManager.tidbFailoverPeriod": failoverPeriod.String(), + "controllerManager.tikvFailoverPeriod": failoverPeriod.String(), + "controllerManager.tiflashFailoverPeriod": failoverPeriod.String(), + }, + Features: []string{ + "AdvancedStatefulSet=true", + }, + } + oa = tests.NewOperatorActions(cli, c, asCli, aggrCli, apiExtCli, tests.DefaultPollInterval, ocfg, e2econfig.TestConfig, nil, fw, f) + ginkgo.By("Installing CRDs") + oa.CleanCRDOrDie() + oa.InstallCRDOrDie(ocfg) + ginkgo.By("Installing tidb-operator") + oa.CleanOperatorOrDie(ocfg) + oa.DeployOperatorOrDie(ocfg) + var err error + genericCli, err = client.New(config, client.Options{Scheme: scheme.Scheme}) + framework.ExpectNoError(err, "failed to create clientset") + }) + + // https://github.com/pingcap/tidb-operator/issues/1464 + ginkgo.It("delete the failed pod via delete-slots feature of Advanced Statefulset after failover", func() { + ginkgo.By("Make sure we have at least 3 schedulable nodes") + nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet) + gomega.Expect(len(nodeList.Items)).To(gomega.BeNumerically(">=", 3)) + + clusterName := "failover" + tc := fixture.GetTidbCluster(ns, clusterName, utilimage.TiDBV3Version) + tc.Spec.PD.Replicas = 1 + tc.Spec.PD.Config.Schedule = &v1alpha1.PDScheduleConfig{ + MaxStoreDownTime: pointer.StringPtr("1m"), + } + tc.Spec.TiDB.Replicas = 1 + tc.Spec.TiKV.Replicas = 3 + err := genericCli.Create(context.TODO(), tc) + framework.ExpectNoError(err) + + ginkgo.By("Waiting for the tidb cluster to become ready") + err = oa.WaitForTidbClusterReady(tc, 30*time.Minute, 15*time.Second) + framework.ExpectNoError(err) + + ginkgo.By("Fail a TiKV store") + podName := controller.TiKVMemberName(clusterName) + "-1" + f.ExecCommandInContainer(podName, "tikv", "sh", "-c", "rm -rf /var/lib/tikv/*") + + ginkgo.By("Waiting for the store to be put into failsure stores") + err = e2eutil.WaitForTidbClusterCondition(cli, tc.Namespace, tc.Name, time.Minute*5, func(tc *v1alpha1.TidbCluster) (bool, error) { + exist := false + for _, failureStore := range tc.Status.TiKV.FailureStores { + if failureStore.PodName == podName { + exist = true + } + } + return exist, nil + }) + framework.ExpectNoError(err) + + ginkgo.By("Wait for the new pod to be created") + newPodName := controller.TiKVMemberName(clusterName) + "-3" + err = wait.PollImmediate(time.Second*10, 1*time.Minute, func() (bool, error) { + _, err := c.CoreV1().Pods(ns).Get(newPodName, metav1.GetOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + return false, nil + } + return !apierrors.IsNotFound(err), nil + }) + framework.ExpectNoError(err) + + ginkgo.By(fmt.Sprintf("Deleting the failed pod %q via delete-slots", podName)) + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + if tc.Annotations == nil { + tc.Annotations = map[string]string{} + } + tc.Annotations[label.AnnTiKVDeleteSlots] = mustToString(sets.NewInt32(1)) + return nil + }) + framework.ExpectNoError(err) + + ginkgo.By(fmt.Sprintf("Wait for the failed pod %q to be gone", podName)) + err = e2epod.WaitForPodNotFoundInNamespace(c, podName, ns, time.Minute*5) + framework.ExpectNoError(err) + + ginkgo.By(fmt.Sprintf("Wait for the record of failed pod to be removed from failure stores")) + err = e2eutil.WaitForTidbClusterCondition(cli, tc.Namespace, tc.Name, time.Minute*5, func(tc *v1alpha1.TidbCluster) (bool, error) { + exist := false + for _, failureStore := range tc.Status.TiKV.FailureStores { + if failureStore.PodName == podName { + exist = true + } + } + return !exist, nil + }) + framework.ExpectNoError(err) + }) + }) + }) diff --git a/tests/e2e/util/tidbcluster.go b/tests/e2e/util/tidbcluster.go new file mode 100644 index 00000000000..eb78f5063d7 --- /dev/null +++ b/tests/e2e/util/tidbcluster.go @@ -0,0 +1,40 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "time" + + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + "github.com/pingcap/tidb-operator/pkg/client/clientset/versioned" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + testutils "k8s.io/kubernetes/test/utils" +) + +type TidbClusterCondition func(tc *v1alpha1.TidbCluster) (bool, error) + +// WaitForTidbClusterCondition waits a TidbCluster to be matched to the given condition. +func WaitForTidbClusterCondition(c versioned.Interface, ns, name string, timeout time.Duration, condition TidbClusterCondition) error { + return wait.PollImmediate(time.Second*10, timeout, func() (bool, error) { + tc, err := c.PingcapV1alpha1().TidbClusters(ns).Get(name, metav1.GetOptions{}) + if err != nil { + if testutils.IsRetryableAPIError(err) { + return false, nil + } + return false, err + } + return condition(tc) + }) +}