Skip to content

Commit

Permalink
Add more events for tidbcluster and autoscaler (#2150) (#2181)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yisaer authored Apr 15, 2020
1 parent 71cbc2c commit 1f305d2
Show file tree
Hide file tree
Showing 10 changed files with 48 additions and 14 deletions.
15 changes: 13 additions & 2 deletions pkg/autoscaler/autoscaler/autoscaler_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ package autoscaler

import (
"fmt"
"strings"

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/client/clientset/versioned"
informers "github.com/pingcap/tidb-operator/pkg/client/informers/externalversions"
v1alpha1listers "github.com/pingcap/tidb-operator/pkg/client/listers/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
Expand Down Expand Up @@ -80,7 +82,7 @@ func (am *autoScalerManager) Sync(tac *v1alpha1.TidbClusterAutoScaler) error {
if err := am.syncAutoScaling(tc, tac); err != nil {
return err
}
if err := am.syncTidbClusterReplicas(tc, oldTc); err != nil {
if err := am.syncTidbClusterReplicas(tac, tc, oldTc); err != nil {
return err
}
return am.syncAutoScalingStatus(tc, oldTc, tac)
Expand All @@ -102,7 +104,7 @@ func (am *autoScalerManager) syncAutoScaling(tc *v1alpha1.TidbCluster, tac *v1al
return nil
}

func (am *autoScalerManager) syncTidbClusterReplicas(tc *v1alpha1.TidbCluster, oldTc *v1alpha1.TidbCluster) error {
func (am *autoScalerManager) syncTidbClusterReplicas(tac *v1alpha1.TidbClusterAutoScaler, tc *v1alpha1.TidbCluster, oldTc *v1alpha1.TidbCluster) error {
if tc.Spec.TiDB.Replicas == oldTc.Spec.TiDB.Replicas && tc.Spec.TiKV.Replicas == oldTc.Spec.TiKV.Replicas {
return nil
}
Expand All @@ -111,6 +113,15 @@ func (am *autoScalerManager) syncTidbClusterReplicas(tc *v1alpha1.TidbCluster, o
if err != nil {
return err
}
reason := fmt.Sprintf("Successful %s", strings.Title("auto-scaling"))
msg := ""
if tc.Spec.TiDB.Replicas != oldTc.Spec.TiDB.Replicas {
msg = fmt.Sprintf("%s auto-scaling tidb from %d to %d", msg, oldTc.Spec.TiDB.Replicas, tc.Spec.TiDB.Replicas)
}
if tc.Spec.TiKV.Replicas != oldTc.Spec.TiKV.Replicas {
msg = fmt.Sprintf("%s auto-scaling tikv from %d to %d", msg, oldTc.Spec.TiKV.Replicas, tc.Spec.TiKV.Replicas)
}
am.recorder.Event(tac, corev1.EventTypeNormal, reason, msg)
return nil
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/controller/tidbcluster/tidb_cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ func NewController(
tikvScaler := mm.NewTiKVScaler(pdControl, pvcInformer.Lister(), pvcControl, podInformer.Lister())
tiflashScaler := mm.NewTiFlashScaler(pdControl, pvcInformer.Lister(), pvcControl, podInformer.Lister())
pdFailover := mm.NewPDFailover(cli, pdControl, pdFailoverPeriod, podInformer.Lister(), podControl, pvcInformer.Lister(), pvcControl, pvInformer.Lister(), recorder)
tikvFailover := mm.NewTiKVFailover(tikvFailoverPeriod)
tikvFailover := mm.NewTiKVFailover(tikvFailoverPeriod, recorder)
tidbFailover := mm.NewTiDBFailover(tidbFailoverPeriod, recorder)
tiflashFailover := mm.NewTiFlashFailover(tiflashFailoverPeriod)
tidbFailover := mm.NewTiDBFailover(tidbFailoverPeriod)
pdUpgrader := mm.NewPDUpgrader(pdControl, podControl, podInformer.Lister())
tikvUpgrader := mm.NewTiKVUpgrader(pdControl, podControl, podInformer.Lister())
tiflashUpgrader := mm.NewTiFlashUpgrader(pdControl, podControl, podInformer.Lister())
Expand Down
5 changes: 5 additions & 0 deletions pkg/manager/member/failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ package member

import "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"

const (
unHealthEventReason = "Unhealthy"
unHealthEventMsgPattern = "%s pod[%s] is unhealthy, msg:%s"
)

// Failover implements the logic for pd/tikv/tidb's failover and recovery.
type Failover interface {
Failover(*v1alpha1.TidbCluster) error
Expand Down
4 changes: 2 additions & 2 deletions pkg/manager/member/pd_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ func (pf *pdFailover) tryToMarkAPeerAsFailure(tc *v1alpha1.TidbCluster) error {
return err
}

pf.recorder.Eventf(tc, apiv1.EventTypeWarning, "PDMemberMarkedAsFailure",
"%s(%s) marked as a failure member", podName, pdMember.ID)
msg := fmt.Sprintf("pd member[%s] is unhealthy", pdMember.ID)
pf.recorder.Event(tc, apiv1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "pd", podName, msg))

tc.Status.PD.FailureMembers[podName] = v1alpha1.PDFailureMember{
PodName: podName,
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/member/pd_failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ func TestPDFailoverFailover(t *testing.T) {
events := collectEvents(recorder.Events)
g.Expect(events).To(HaveLen(2))
g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy"))
g.Expect(events[1]).To(ContainSubstring("test-pd-1(12891273174085095651) marked as a failure member"))
g.Expect(events[1]).To(ContainSubstring("Unhealthy pd pod[test-pd-1] is unhealthy, msg:pd member[12891273174085095651] is unhealthy"))
},
},
{
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/member/pd_scaler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ package member

import (
"fmt"
"github.com/pingcap/kvproto/pkg/pdpb"
"testing"
"time"

. "github.com/onsi/gomega"
"github.com/pingcap/kvproto/pkg/pdpb"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
Expand Down
12 changes: 10 additions & 2 deletions pkg/manager/member/tidb_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,26 @@
package member

import (
"fmt"
"time"

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/klog"
)

type tidbFailover struct {
tidbFailoverPeriod time.Duration
recorder record.EventRecorder
}

// NewTiDBFailover returns a tidbFailover instance
func NewTiDBFailover(failoverPeriod time.Duration) Failover {
func NewTiDBFailover(failoverPeriod time.Duration, recorder record.EventRecorder) Failover {
return &tidbFailover{
tidbFailoverPeriod: failoverPeriod,
recorder: recorder,
}
}

Expand Down Expand Up @@ -59,6 +64,8 @@ func (tf *tidbFailover) Failover(tc *v1alpha1.TidbCluster) error {
PodName: tidbMember.Name,
CreatedAt: metav1.Now(),
}
msg := fmt.Sprintf("tidb[%s] is unhealthy", tidbMember.Name)
tf.recorder.Event(tc, corev1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "tidb", tidbMember.Name, msg))
break
}
}
Expand All @@ -71,7 +78,8 @@ func (tf *tidbFailover) Recover(tc *v1alpha1.TidbCluster) {
tc.Status.TiDB.FailureMembers = nil
}

type fakeTiDBFailover struct{}
type fakeTiDBFailover struct {
}

// NewFakeTiDBFailover returns a fake Failover
func NewFakeTiDBFailover() Failover {
Expand Down
4 changes: 3 additions & 1 deletion pkg/manager/member/tidb_failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

. "github.com/onsi/gomega"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"k8s.io/utils/pointer"

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
Expand Down Expand Up @@ -382,7 +383,8 @@ func TestFakeTiDBFailoverRecover(t *testing.T) {
}

func newTiDBFailover() Failover {
return &tidbFailover{tidbFailoverPeriod: time.Duration(5 * time.Minute)}
recorder := record.NewFakeRecorder(100)
return &tidbFailover{tidbFailoverPeriod: time.Duration(5 * time.Minute), recorder: recorder}
}

func newTidbClusterForTiDBFailover() *v1alpha1.TidbCluster {
Expand Down
10 changes: 8 additions & 2 deletions pkg/manager/member/tikv_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,24 @@
package member

import (
"fmt"
"time"

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/klog"
)

type tikvFailover struct {
tikvFailoverPeriod time.Duration
recorder record.EventRecorder
}

// NewTiKVFailover returns a tikv Failover
func NewTiKVFailover(tikvFailoverPeriod time.Duration) Failover {
return &tikvFailover{tikvFailoverPeriod}
func NewTiKVFailover(tikvFailoverPeriod time.Duration, recorder record.EventRecorder) Failover {
return &tikvFailover{tikvFailoverPeriod, recorder}
}

func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error {
Expand Down Expand Up @@ -62,6 +66,8 @@ func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error {
StoreID: store.ID,
CreatedAt: metav1.Now(),
}
msg := fmt.Sprintf("store[%s] is Down", store.ID)
tf.recorder.Event(tc, corev1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "tikv", podName, msg))
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/manager/member/tikv_failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
. "github.com/onsi/gomega"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/utils/pointer"
)

Expand Down Expand Up @@ -304,5 +305,6 @@ func TestTiKVFailoverFailover(t *testing.T) {
}

func newFakeTiKVFailover() *tikvFailover {
return &tikvFailover{1 * time.Hour}
recorder := record.NewFakeRecorder(100)
return &tikvFailover{1 * time.Hour, recorder}
}

0 comments on commit 1f305d2

Please sign in to comment.