-
Notifications
You must be signed in to change notification settings - Fork 21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feat: delete the sync deployment after deploy complete #78
Changes from 3 commits
8e2f178
a554418
e61caef
1cf1d79
9dcf3bc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,16 +4,28 @@ import ( | |
"bytes" | ||
"context" | ||
"fmt" | ||
"strings" | ||
"time" | ||
|
||
"github.com/opencurve/curve-operator/pkg/daemon" | ||
"github.com/opencurve/curve-operator/pkg/k8sutil" | ||
"github.com/pkg/errors" | ||
apps "k8s.io/api/apps/v1" | ||
batch "k8s.io/api/batch/v1" | ||
v1 "k8s.io/api/core/v1" | ||
kerrors "k8s.io/apimachinery/pkg/api/errors" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/client-go/kubernetes/scheme" | ||
"k8s.io/client-go/tools/remotecommand" | ||
|
||
"github.com/opencurve/curve-operator/pkg/chunkserver" | ||
"github.com/opencurve/curve-operator/pkg/config" | ||
"github.com/opencurve/curve-operator/pkg/daemon" | ||
"github.com/opencurve/curve-operator/pkg/etcd" | ||
"github.com/opencurve/curve-operator/pkg/k8sutil" | ||
"github.com/opencurve/curve-operator/pkg/mds" | ||
"github.com/opencurve/curve-operator/pkg/metaserver" | ||
"github.com/opencurve/curve-operator/pkg/monitor" | ||
"github.com/opencurve/curve-operator/pkg/snapshotclone" | ||
"github.com/opencurve/curve-operator/pkg/topology" | ||
) | ||
|
||
const ( | ||
|
@@ -86,6 +98,10 @@ func createSyncDeployment(c *daemon.Cluster) error { | |
return err | ||
} | ||
} | ||
|
||
// delete the SyncConfigDeployment after the cluster is deployed. | ||
go deleteSyncConfigDeployment(c, newDeployment.GetName()) | ||
|
||
// update condition type and phase etc. | ||
return nil | ||
} | ||
|
@@ -154,3 +170,161 @@ func getReadConfigJobLabel(c *daemon.Cluster) map[string]string { | |
labels["curve"] = c.Kind | ||
return labels | ||
} | ||
|
||
type checkClusterDeployedInfo struct { | ||
mdsReady int | ||
etcdReady int | ||
|
||
metaServerReady int | ||
chunkServerReady int | ||
snapShotCloneReady int | ||
|
||
grafanaReady int | ||
prometheusReady int | ||
nodeExporterReady int | ||
|
||
jobPreChunkFileCompleted int | ||
jobProLogicPoolCompleted int | ||
jobProPhysicalPoolCompleted int | ||
} | ||
|
||
// deleteSyncConfigDeployment delete the SyncConfigDeployment after the cluster is deployed. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that put the func in k8sutil fold and deployment file is better There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, I've adjusted its directory |
||
func deleteSyncConfigDeployment(c *daemon.Cluster, syncConfigDeployment string) { | ||
|
||
wantChunkServer := len(c.Chunkserver.Devices) * len(c.Chunkserver.Nodes) | ||
nodeCount := len(c.Nodes) | ||
|
||
time.Sleep(1 * time.Minute) | ||
|
||
if c.Kind == config.KIND_CURVEBS { | ||
logger.Debugf("node count is %d, wanted chunk server count is %d", nodeCount, wantChunkServer) | ||
} else if c.Kind == config.KIND_CURVEFS { | ||
logger.Debugf("node count is %d", nodeCount) | ||
} | ||
|
||
checkTicker := time.NewTicker(30 * time.Second) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The for{} never stop if something wrong, you can add a timeout time or limit retry times. |
||
|
||
for { | ||
isAllReadyOrCompleted := true | ||
info := &checkClusterDeployedInfo{} | ||
deploymentList, err := c.Context.Clientset.AppsV1().Deployments(c.Namespace).List(metav1.ListOptions{}) | ||
if err != nil { | ||
logger.Errorf("failed to list deployment in namespace %s for delete curve-sync-config", c.Namespace) | ||
} | ||
|
||
jobs, err := c.Context.Clientset.BatchV1().Jobs(c.Namespace).List(metav1.ListOptions{}) | ||
if err != nil { | ||
logger.Errorf("failed to list jobs in namespace %s for delete curve-sync-config", c.Namespace) | ||
} | ||
|
||
for _, deploy := range deploymentList.Items { | ||
switch { | ||
case strings.HasPrefix(deploy.Name, etcd.AppName): | ||
if isAllReplicasReady(deploy) { | ||
info.etcdReady++ | ||
} | ||
case strings.HasPrefix(deploy.Name, mds.AppName): | ||
if isAllReplicasReady(deploy) { | ||
info.mdsReady++ | ||
} | ||
case strings.HasPrefix(deploy.Name, chunkserver.AppName): | ||
if isAllReplicasReady(deploy) { | ||
info.chunkServerReady++ | ||
} | ||
case strings.HasPrefix(deploy.Name, metaserver.AppName): | ||
if isAllReplicasReady(deploy) { | ||
info.metaServerReady++ | ||
} | ||
case strings.HasPrefix(deploy.Name, snapshotclone.AppName): | ||
if isAllReplicasReady(deploy) { | ||
info.snapShotCloneReady++ | ||
} | ||
case strings.HasPrefix(deploy.Name, monitor.GrafanaAppName): | ||
if isAllReplicasReady(deploy) { | ||
info.grafanaReady++ | ||
} | ||
case strings.HasPrefix(deploy.Name, monitor.PromAppName): | ||
if isAllReplicasReady(deploy) { | ||
info.prometheusReady++ | ||
} | ||
case strings.HasPrefix(deploy.Name, monitor.NodeExporterAppName): | ||
if isAllReplicasReady(deploy) { | ||
info.nodeExporterReady++ | ||
} | ||
} | ||
} | ||
|
||
for _, job := range jobs.Items { | ||
switch { | ||
case strings.HasPrefix(job.Name, topology.JOB_PYHSICAL_POOL): | ||
if isJobCompleted(job) { | ||
info.jobProPhysicalPoolCompleted++ | ||
} | ||
case strings.HasPrefix(job.Name, topology.JOB_LOGICAL_POOL): | ||
if isJobCompleted(job) { | ||
info.jobProLogicPoolCompleted++ | ||
} | ||
case strings.HasPrefix(job.Name, chunkserver.PrepareJobName): | ||
if isJobCompleted(job) { | ||
info.jobPreChunkFileCompleted++ | ||
} | ||
} | ||
} | ||
|
||
if c.SnapShotClone.Enable { | ||
if info.snapShotCloneReady != nodeCount { | ||
isAllReadyOrCompleted = false | ||
} | ||
} | ||
|
||
if c.Monitor.Enable { | ||
if info.grafanaReady == 0 || | ||
info.prometheusReady == 0 || | ||
info.nodeExporterReady != nodeCount { | ||
isAllReadyOrCompleted = false | ||
} | ||
} | ||
|
||
if c.Kind == config.KIND_CURVEBS && (info.chunkServerReady != wantChunkServer || | ||
info.jobPreChunkFileCompleted != wantChunkServer || | ||
info.jobProLogicPoolCompleted == 0 || | ||
info.jobProPhysicalPoolCompleted == 0) { | ||
isAllReadyOrCompleted = false | ||
} | ||
|
||
if c.Kind == config.KIND_CURVEFS && | ||
(info.metaServerReady != nodeCount || info.jobProLogicPoolCompleted == 0) { | ||
isAllReadyOrCompleted = false | ||
} | ||
|
||
if info.etcdReady != nodeCount || info.mdsReady != nodeCount { | ||
isAllReadyOrCompleted = false | ||
} | ||
|
||
if isAllReadyOrCompleted { | ||
break | ||
} | ||
<-checkTicker.C | ||
} | ||
|
||
err := c.Context.Clientset.AppsV1().Deployments(c.Namespace).Delete(syncConfigDeployment, &metav1.DeleteOptions{}) | ||
if err != nil { | ||
logger.Errorf("failed to delete deployment about \"curve-sync-config\", error: %s", err) | ||
} | ||
|
||
logger.Infof("cluster is deployed, deployment about \"curve-sync-config\" will be deleted") | ||
} | ||
|
||
func isAllReplicasReady(deployment apps.Deployment) bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The deployment operation and job operation code put the k8sutil dir is better. |
||
if deployment.Status.Replicas == deployment.Status.ReadyReplicas { | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
func isJobCompleted(job batch.Job) bool { | ||
if *job.Spec.Completions == job.Status.Succeeded { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the job status is failed or other status sometimes. |
||
return true | ||
} | ||
return false | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
using channel to get delete result (true or false) and log the reason if delete the deployment failed.