Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dm-operator/: support scaling a dm cluster with dm-masters and dm-workers #3186

Merged
merged 60 commits into from
Sep 2, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
08a8ae4
add dmclusters CRD
lichunzhu Aug 5, 2020
5cb82ac
resolve conflicts
lichunzhu Aug 5, 2020
bcf58ea
Merge branch 'master' into defineDMSpec
lichunzhu Aug 5, 2020
55a6365
address comment
lichunzhu Aug 6, 2020
0aae8c0
Merge branch 'master' into defineDMSpec
lichunzhu Aug 6, 2020
f146be1
address comments
lichunzhu Aug 6, 2020
94c427b
Merge branch 'defineDMSpec' of https://github.com/lichunzhu/tidb-oper…
lichunzhu Aug 6, 2020
4f10a51
delete monitor ref
lichunzhu Aug 6, 2020
a020492
generate dmcluster client
lichunzhu Aug 6, 2020
3edfaa2
address comments
lichunzhu Aug 6, 2020
74aca39
Merge branch 'master' into defineDMSpec
lichunzhu Aug 7, 2020
2c1bec5
address comment
lichunzhu Aug 7, 2020
fbe26f3
tmp commit
lichunzhu Aug 7, 2020
d85a9fc
resolve conflict
lichunzhu Aug 7, 2020
a9da15f
merge master
lichunzhu Aug 11, 2020
ba0f518
remove dm package
lichunzhu Aug 12, 2020
7a51c07
fix bugs
lichunzhu Aug 12, 2020
3ec6c86
fix bug
lichunzhu Aug 12, 2020
8122c71
support start dm-master and dm-worker in cluster
lichunzhu Aug 18, 2020
a38bb0e
fix some bugs
lichunzhu Aug 19, 2020
1e7efd8
merge master branch and resolve conflicts
lichunzhu Aug 19, 2020
850035c
fix ut
lichunzhu Aug 20, 2020
cd3f5b4
fix dm-master start
lichunzhu Aug 24, 2020
f4a641d
fix dm-worker start bug
lichunzhu Aug 24, 2020
5a51cbe
Merge branch 'master' of https://github.com/pingcap/tidb-operator int…
lichunzhu Aug 25, 2020
be4fd5e
add more column info
lichunzhu Aug 25, 2020
1e4f2ac
Merge branch 'master' of https://github.com/pingcap/tidb-operator int…
lichunzhu Aug 25, 2020
daf81f7
Merge branch 'master' into supportStartDMCluster
lichunzhu Aug 25, 2020
f7c70bd
fix ut
lichunzhu Aug 25, 2020
08f4695
fix verify
lichunzhu Aug 25, 2020
e2249f9
fix verify again
lichunzhu Aug 25, 2020
161b862
address comments
lichunzhu Aug 25, 2020
35a69d3
Merge branch 'master' into supportStartDMCluster
lichunzhu Aug 25, 2020
d3da808
regenerate code
lichunzhu Aug 25, 2020
8e52fbb
address comments
lichunzhu Aug 25, 2020
cefe7b8
Merge branch 'master' into supportStartDMCluster
lichunzhu Aug 25, 2020
f1b6029
fix import cycle problem
lichunzhu Aug 26, 2020
7726669
Merge branch 'supportStartDMCluster' of https://github.com/lichunzhu/…
lichunzhu Aug 26, 2020
84a6309
fix check
lichunzhu Aug 26, 2020
5ab015d
address comments
lichunzhu Aug 26, 2020
c33746e
support graceful upgrade for dm-master
lichunzhu Aug 26, 2020
b998616
merge master
lichunzhu Aug 26, 2020
f23a336
address comments
lichunzhu Aug 27, 2020
7282732
Merge branch 'master' into supportUpgradeDMCluster
lichunzhu Aug 27, 2020
cdc031a
add dm-master scaler
lichunzhu Aug 28, 2020
bb94d5e
tmp
lichunzhu Aug 28, 2020
d5cfc31
support scale dm cluster
lichunzhu Aug 31, 2020
c3d3592
merge master and resolve conflicts
lichunzhu Aug 31, 2020
bf0db90
let scaling take precedence over upgrading
lichunzhu Aug 31, 2020
f6e5a55
fix ut
lichunzhu Aug 31, 2020
e9e6032
Merge branch 'master' into supportScaleDMCluster
lichunzhu Sep 1, 2020
38db6b3
Merge branch 'master' into supportScaleDMCluster
lichunzhu Sep 1, 2020
57803d5
address comments
lichunzhu Sep 1, 2020
aa4903b
Merge branch 'supportScaleDMCluster' of https://github.com/lichunzhu/…
lichunzhu Sep 1, 2020
57e925d
Merge branch 'master' into supportScaleDMCluster
lichunzhu Sep 2, 2020
4b2deec
address comment
lichunzhu Sep 2, 2020
979fd98
address comment
lichunzhu Sep 2, 2020
ca4048c
Merge branch 'master' into supportScaleDMCluster
lichunzhu Sep 2, 2020
ead903f
address comment
lichunzhu Sep 2, 2020
12d8cba
Merge branch 'supportScaleDMCluster' of https://github.com/lichunzhu/…
lichunzhu Sep 2, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions pkg/apis/pingcap/v1alpha1/dmcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@
package v1alpha1

import (
"encoding/json"
"fmt"
"strings"

"github.com/pingcap/advanced-statefulset/client/apis/apps/v1/helper"
"github.com/pingcap/tidb-operator/pkg/label"
"k8s.io/apimachinery/pkg/util/sets"
)

func (dc *DMCluster) Scheme() string {
Expand Down Expand Up @@ -92,6 +97,17 @@ func (dc *DMCluster) WorkerStsDesiredReplicas() int32 {
return dc.Spec.Worker.Replicas
}

func (dc *DMCluster) WorkerStsDesiredOrdinals(excludeFailover bool) sets.Int32 {
if dc.Spec.Worker == nil {
return sets.Int32{}
}
replicas := dc.Spec.Worker.Replicas
if !excludeFailover {
replicas = dc.WorkerStsDesiredReplicas()
}
return helper.GetPodOrdinalsFromReplicasAndDeleteSlots(replicas, dc.getDeleteSlots(label.DMWorkerLabelVal))
}

func (dc *DMCluster) GetInstanceName() string {
return dc.Name
}
Expand Down Expand Up @@ -138,6 +154,33 @@ func (dc *DMCluster) MasterScaling() bool {
return dc.Status.Master.Phase == ScalePhase
}

func (dc *DMCluster) getDeleteSlots(component string) (deleteSlots sets.Int32) {
deleteSlots = sets.NewInt32()
annotations := dc.GetAnnotations()
if annotations == nil {
return deleteSlots
}
var key string
if component == label.DMMasterLabelVal {
key = label.AnnDMMasterDeleteSlots
} else if component == label.DMWorkerLabelVal {
key = label.AnnDMWorkerDeleteSlots
} else {
return
}
value, ok := annotations[key]
if !ok {
return
}
var slice []int32
err := json.Unmarshal([]byte(value), &slice)
if err != nil {
return
}
deleteSlots.Insert(slice...)
return
}

func (dc *DMCluster) MasterIsAvailable() bool {
lowerLimit := dc.Spec.Master.Replicas/2 + 1
if int32(len(dc.Status.Master.Members)) < lowerLimit {
Expand Down
15 changes: 11 additions & 4 deletions pkg/manager/member/dm_worker_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,10 +269,7 @@ func (wmm *workerMemberManager) syncDMClusterStatus(dc *v1alpha1.DMCluster, set

// offline the workers that already been scaled-in
if status.Stage == "offline" {
ordinal, err := util.GetOrdinalFromPodName(worker.Name)
if err != nil {
klog.Errorf("invalid worker name %s, can't offline this worker automatically, err: %s", worker.Name, err)
} else if ordinal >= dc.WorkerStsDesiredReplicas() {
if !isWorkerPodDesired(dc, name) {
err := dmClient.DeleteWorker(name)
DanielZhangQD marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
klog.Errorf("fail to remove worker %s, err: %s", worker.Name, err)
Expand Down Expand Up @@ -549,3 +546,13 @@ func getWorkerConfigMap(dc *v1alpha1.DMCluster) (*corev1.ConfigMap, error) {
}
return cm, nil
}

func isWorkerPodDesired(dc *v1alpha1.DMCluster, podName string) bool {
ordinals := dc.WorkerStsDesiredOrdinals(true)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be false here?

ordinal, err := util.GetOrdinalFromPodName(podName)
if err != nil {
klog.Errorf("unexpected pod name %q: %v", podName, err)
return false
}
return ordinals.Has(ordinal)
}
5 changes: 4 additions & 1 deletion pkg/manager/member/dm_worker_scaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ func (wsd *workerScaler) ScaleOut(meta metav1.Object, oldSet *apps.StatefulSet,
return nil
}

// We need remove member from cluster before reducing statefulset replicas
// Different from dm-master, we need remove member from cluster **after** reducing statefulset replicas
// Now it will be removed in syncing worker status. For dm-worker we can't remove its register info from dm-master
// when it's still alive. So we delete it later after its keepalive lease is outdated or revoked.
// We can defer deleting dm-worker register info because dm-master will patch replication task through keepalive info.
// only remove one member at a time when scale down
func (wsd *workerScaler) ScaleIn(meta metav1.Object, oldSet *apps.StatefulSet, newSet *apps.StatefulSet) error {
dc, ok := meta.(*v1alpha1.DMCluster)
Expand Down