diff --git a/pkg/autoscaler/autoscaler/autoscaler_manager.go b/pkg/autoscaler/autoscaler/autoscaler_manager.go index 3bbc7c7454..16784cee1c 100644 --- a/pkg/autoscaler/autoscaler/autoscaler_manager.go +++ b/pkg/autoscaler/autoscaler/autoscaler_manager.go @@ -15,12 +15,14 @@ package autoscaler import ( "fmt" + "strings" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/client/clientset/versioned" informers "github.com/pingcap/tidb-operator/pkg/client/informers/externalversions" v1alpha1listers "github.com/pingcap/tidb-operator/pkg/client/listers/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -80,7 +82,7 @@ func (am *autoScalerManager) Sync(tac *v1alpha1.TidbClusterAutoScaler) error { if err := am.syncAutoScaling(tc, tac); err != nil { return err } - if err := am.syncTidbClusterReplicas(tc, oldTc); err != nil { + if err := am.syncTidbClusterReplicas(tac, tc, oldTc); err != nil { return err } return am.syncAutoScalingStatus(tc, oldTc, tac) @@ -102,7 +104,7 @@ func (am *autoScalerManager) syncAutoScaling(tc *v1alpha1.TidbCluster, tac *v1al return nil } -func (am *autoScalerManager) syncTidbClusterReplicas(tc *v1alpha1.TidbCluster, oldTc *v1alpha1.TidbCluster) error { +func (am *autoScalerManager) syncTidbClusterReplicas(tac *v1alpha1.TidbClusterAutoScaler, tc *v1alpha1.TidbCluster, oldTc *v1alpha1.TidbCluster) error { if tc.Spec.TiDB.Replicas == oldTc.Spec.TiDB.Replicas && tc.Spec.TiKV.Replicas == oldTc.Spec.TiKV.Replicas { return nil } @@ -111,6 +113,15 @@ func (am *autoScalerManager) syncTidbClusterReplicas(tc *v1alpha1.TidbCluster, o if err != nil { return err } + reason := fmt.Sprintf("Successful %s", strings.Title("auto-scaling")) + msg := "" + if tc.Spec.TiDB.Replicas != oldTc.Spec.TiDB.Replicas { + msg = fmt.Sprintf("%s auto-scaling tidb from %d to %d", msg, oldTc.Spec.TiDB.Replicas, tc.Spec.TiDB.Replicas) + } + if tc.Spec.TiKV.Replicas != oldTc.Spec.TiKV.Replicas { + msg = fmt.Sprintf("%s auto-scaling tikv from %d to %d", msg, oldTc.Spec.TiKV.Replicas, tc.Spec.TiKV.Replicas) + } + am.recorder.Event(tac, corev1.EventTypeNormal, reason, msg) return nil } diff --git a/pkg/controller/tidbcluster/tidb_cluster_controller.go b/pkg/controller/tidbcluster/tidb_cluster_controller.go index a7dd4d3af5..d66aba0ee7 100644 --- a/pkg/controller/tidbcluster/tidb_cluster_controller.go +++ b/pkg/controller/tidbcluster/tidb_cluster_controller.go @@ -109,9 +109,9 @@ func NewController( tikvScaler := mm.NewTiKVScaler(pdControl, pvcInformer.Lister(), pvcControl, podInformer.Lister()) tiflashScaler := mm.NewTiFlashScaler(pdControl, pvcInformer.Lister(), pvcControl, podInformer.Lister()) pdFailover := mm.NewPDFailover(cli, pdControl, pdFailoverPeriod, podInformer.Lister(), podControl, pvcInformer.Lister(), pvcControl, pvInformer.Lister(), recorder) - tikvFailover := mm.NewTiKVFailover(tikvFailoverPeriod) + tikvFailover := mm.NewTiKVFailover(tikvFailoverPeriod, recorder) + tidbFailover := mm.NewTiDBFailover(tidbFailoverPeriod, recorder) tiflashFailover := mm.NewTiFlashFailover(tiflashFailoverPeriod) - tidbFailover := mm.NewTiDBFailover(tidbFailoverPeriod) pdUpgrader := mm.NewPDUpgrader(pdControl, podControl, podInformer.Lister()) tikvUpgrader := mm.NewTiKVUpgrader(pdControl, podControl, podInformer.Lister()) tiflashUpgrader := mm.NewTiFlashUpgrader(pdControl, podControl, podInformer.Lister()) diff --git a/pkg/manager/member/failover.go b/pkg/manager/member/failover.go index 7744ee7a80..7a71f1015e 100644 --- a/pkg/manager/member/failover.go +++ b/pkg/manager/member/failover.go @@ -15,6 +15,11 @@ package member import "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" +const ( + unHealthEventReason = "Unhealthy" + unHealthEventMsgPattern = "%s pod[%s] is unhealthy, msg:%s" +) + // Failover implements the logic for pd/tikv/tidb's failover and recovery. type Failover interface { Failover(*v1alpha1.TidbCluster) error diff --git a/pkg/manager/member/pd_failover.go b/pkg/manager/member/pd_failover.go index 0d668eea23..51526f1926 100644 --- a/pkg/manager/member/pd_failover.go +++ b/pkg/manager/member/pd_failover.go @@ -140,8 +140,8 @@ func (pf *pdFailover) tryToMarkAPeerAsFailure(tc *v1alpha1.TidbCluster) error { return err } - pf.recorder.Eventf(tc, apiv1.EventTypeWarning, "PDMemberMarkedAsFailure", - "%s(%s) marked as a failure member", podName, pdMember.ID) + msg := fmt.Sprintf("pd member[%s] is unhealthy", pdMember.ID) + pf.recorder.Event(tc, apiv1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "pd", podName, msg)) tc.Status.PD.FailureMembers[podName] = v1alpha1.PDFailureMember{ PodName: podName, diff --git a/pkg/manager/member/pd_failover_test.go b/pkg/manager/member/pd_failover_test.go index cd9ca847d1..bbb4696a7d 100644 --- a/pkg/manager/member/pd_failover_test.go +++ b/pkg/manager/member/pd_failover_test.go @@ -275,7 +275,7 @@ func TestPDFailoverFailover(t *testing.T) { events := collectEvents(recorder.Events) g.Expect(events).To(HaveLen(2)) g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) - g.Expect(events[1]).To(ContainSubstring("test-pd-1(12891273174085095651) marked as a failure member")) + g.Expect(events[1]).To(ContainSubstring("Unhealthy pd pod[test-pd-1] is unhealthy, msg:pd member[12891273174085095651] is unhealthy")) }, }, { diff --git a/pkg/manager/member/pd_scaler_test.go b/pkg/manager/member/pd_scaler_test.go index 39b4ca7f6e..ecb4c90788 100644 --- a/pkg/manager/member/pd_scaler_test.go +++ b/pkg/manager/member/pd_scaler_test.go @@ -15,11 +15,11 @@ package member import ( "fmt" - "github.com/pingcap/kvproto/pkg/pdpb" "testing" "time" . "github.com/onsi/gomega" + "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" "github.com/pingcap/tidb-operator/pkg/label" diff --git a/pkg/manager/member/tidb_failover.go b/pkg/manager/member/tidb_failover.go index fbd18dfb66..17df4353ee 100644 --- a/pkg/manager/member/tidb_failover.go +++ b/pkg/manager/member/tidb_failover.go @@ -14,21 +14,26 @@ package member import ( + "fmt" "time" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" "k8s.io/klog" ) type tidbFailover struct { tidbFailoverPeriod time.Duration + recorder record.EventRecorder } // NewTiDBFailover returns a tidbFailover instance -func NewTiDBFailover(failoverPeriod time.Duration) Failover { +func NewTiDBFailover(failoverPeriod time.Duration, recorder record.EventRecorder) Failover { return &tidbFailover{ tidbFailoverPeriod: failoverPeriod, + recorder: recorder, } } @@ -59,6 +64,8 @@ func (tf *tidbFailover) Failover(tc *v1alpha1.TidbCluster) error { PodName: tidbMember.Name, CreatedAt: metav1.Now(), } + msg := fmt.Sprintf("tidb[%s] is unhealthy", tidbMember.Name) + tf.recorder.Event(tc, corev1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "tidb", tidbMember.Name, msg)) break } } @@ -71,7 +78,8 @@ func (tf *tidbFailover) Recover(tc *v1alpha1.TidbCluster) { tc.Status.TiDB.FailureMembers = nil } -type fakeTiDBFailover struct{} +type fakeTiDBFailover struct { +} // NewFakeTiDBFailover returns a fake Failover func NewFakeTiDBFailover() Failover { diff --git a/pkg/manager/member/tidb_failover_test.go b/pkg/manager/member/tidb_failover_test.go index 4f14218be3..1b15420fa6 100644 --- a/pkg/manager/member/tidb_failover_test.go +++ b/pkg/manager/member/tidb_failover_test.go @@ -19,6 +19,7 @@ import ( . "github.com/onsi/gomega" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" "k8s.io/utils/pointer" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" @@ -382,7 +383,8 @@ func TestFakeTiDBFailoverRecover(t *testing.T) { } func newTiDBFailover() Failover { - return &tidbFailover{tidbFailoverPeriod: time.Duration(5 * time.Minute)} + recorder := record.NewFakeRecorder(100) + return &tidbFailover{tidbFailoverPeriod: time.Duration(5 * time.Minute), recorder: recorder} } func newTidbClusterForTiDBFailover() *v1alpha1.TidbCluster { diff --git a/pkg/manager/member/tikv_failover.go b/pkg/manager/member/tikv_failover.go index 59085b3640..036659a471 100644 --- a/pkg/manager/member/tikv_failover.go +++ b/pkg/manager/member/tikv_failover.go @@ -14,20 +14,24 @@ package member import ( + "fmt" "time" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" "k8s.io/klog" ) type tikvFailover struct { tikvFailoverPeriod time.Duration + recorder record.EventRecorder } // NewTiKVFailover returns a tikv Failover -func NewTiKVFailover(tikvFailoverPeriod time.Duration) Failover { - return &tikvFailover{tikvFailoverPeriod} +func NewTiKVFailover(tikvFailoverPeriod time.Duration, recorder record.EventRecorder) Failover { + return &tikvFailover{tikvFailoverPeriod, recorder} } func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error { @@ -62,6 +66,8 @@ func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error { StoreID: store.ID, CreatedAt: metav1.Now(), } + msg := fmt.Sprintf("store[%s] is Down", store.ID) + tf.recorder.Event(tc, corev1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "tikv", podName, msg)) } } } diff --git a/pkg/manager/member/tikv_failover_test.go b/pkg/manager/member/tikv_failover_test.go index adce3184a1..477d698a97 100644 --- a/pkg/manager/member/tikv_failover_test.go +++ b/pkg/manager/member/tikv_failover_test.go @@ -20,6 +20,7 @@ import ( . "github.com/onsi/gomega" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" "k8s.io/utils/pointer" ) @@ -304,5 +305,6 @@ func TestTiKVFailoverFailover(t *testing.T) { } func newFakeTiKVFailover() *tikvFailover { - return &tikvFailover{1 * time.Hour} + recorder := record.NewFakeRecorder(100) + return &tikvFailover{1 * time.Hour, recorder} }