Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: delete the sync deployment after deploy complete #78

Merged
merged 5 commits into from
Sep 12, 2023
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 176 additions & 2 deletions pkg/controllers/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,28 @@ import (
"bytes"
"context"
"fmt"
"strings"
"time"

"github.com/opencurve/curve-operator/pkg/daemon"
"github.com/opencurve/curve-operator/pkg/k8sutil"
"github.com/pkg/errors"
apps "k8s.io/api/apps/v1"
batch "k8s.io/api/batch/v1"
v1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/remotecommand"

"github.com/opencurve/curve-operator/pkg/chunkserver"
"github.com/opencurve/curve-operator/pkg/config"
"github.com/opencurve/curve-operator/pkg/daemon"
"github.com/opencurve/curve-operator/pkg/etcd"
"github.com/opencurve/curve-operator/pkg/k8sutil"
"github.com/opencurve/curve-operator/pkg/mds"
"github.com/opencurve/curve-operator/pkg/metaserver"
"github.com/opencurve/curve-operator/pkg/monitor"
"github.com/opencurve/curve-operator/pkg/snapshotclone"
"github.com/opencurve/curve-operator/pkg/topology"
)

const (
Expand Down Expand Up @@ -86,6 +98,10 @@ func createSyncDeployment(c *daemon.Cluster) error {
return err
}
}

// delete the SyncConfigDeployment after the cluster is deployed.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using channel to get delete result (true or false) and log the reason if delete the deployment failed.

go deleteSyncConfigDeployment(c, newDeployment.GetName())

// update condition type and phase etc.
return nil
}
Expand Down Expand Up @@ -154,3 +170,161 @@ func getReadConfigJobLabel(c *daemon.Cluster) map[string]string {
labels["curve"] = c.Kind
return labels
}

type checkClusterDeployedInfo struct {
mdsReady int
etcdReady int

metaServerReady int
chunkServerReady int
snapShotCloneReady int

grafanaReady int
prometheusReady int
nodeExporterReady int

jobPreChunkFileCompleted int
jobProLogicPoolCompleted int
jobProPhysicalPoolCompleted int
}

// deleteSyncConfigDeployment delete the SyncConfigDeployment after the cluster is deployed.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that put the func in k8sutil fold and deployment file is better

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I've adjusted its directory

func deleteSyncConfigDeployment(c *daemon.Cluster, syncConfigDeployment string) {

wantChunkServer := len(c.Chunkserver.Devices) * len(c.Chunkserver.Nodes)
nodeCount := len(c.Nodes)

time.Sleep(1 * time.Minute)

if c.Kind == config.KIND_CURVEBS {
logger.Debugf("node count is %d, wanted chunk server count is %d", nodeCount, wantChunkServer)
} else if c.Kind == config.KIND_CURVEFS {
logger.Debugf("node count is %d", nodeCount)
}

checkTicker := time.NewTicker(30 * time.Second)
Copy link
Collaborator

@caoxianfei1 caoxianfei1 Aug 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The for{} never stop if something wrong, you can add a timeout time or limit retry times.


for {
isAllReadyOrCompleted := true
info := &checkClusterDeployedInfo{}
deploymentList, err := c.Context.Clientset.AppsV1().Deployments(c.Namespace).List(metav1.ListOptions{})
if err != nil {
logger.Errorf("failed to list deployment in namespace %s for delete curve-sync-config", c.Namespace)
}

jobs, err := c.Context.Clientset.BatchV1().Jobs(c.Namespace).List(metav1.ListOptions{})
if err != nil {
logger.Errorf("failed to list jobs in namespace %s for delete curve-sync-config", c.Namespace)
}

for _, deploy := range deploymentList.Items {
switch {
case strings.HasPrefix(deploy.Name, etcd.AppName):
if isAllReplicasReady(deploy) {
info.etcdReady++
}
case strings.HasPrefix(deploy.Name, mds.AppName):
if isAllReplicasReady(deploy) {
info.mdsReady++
}
case strings.HasPrefix(deploy.Name, chunkserver.AppName):
if isAllReplicasReady(deploy) {
info.chunkServerReady++
}
case strings.HasPrefix(deploy.Name, metaserver.AppName):
if isAllReplicasReady(deploy) {
info.metaServerReady++
}
case strings.HasPrefix(deploy.Name, snapshotclone.AppName):
if isAllReplicasReady(deploy) {
info.snapShotCloneReady++
}
case strings.HasPrefix(deploy.Name, monitor.GrafanaAppName):
if isAllReplicasReady(deploy) {
info.grafanaReady++
}
case strings.HasPrefix(deploy.Name, monitor.PromAppName):
if isAllReplicasReady(deploy) {
info.prometheusReady++
}
case strings.HasPrefix(deploy.Name, monitor.NodeExporterAppName):
if isAllReplicasReady(deploy) {
info.nodeExporterReady++
}
}
}

for _, job := range jobs.Items {
switch {
case strings.HasPrefix(job.Name, topology.JOB_PYHSICAL_POOL):
if isJobCompleted(job) {
info.jobProPhysicalPoolCompleted++
}
case strings.HasPrefix(job.Name, topology.JOB_LOGICAL_POOL):
if isJobCompleted(job) {
info.jobProLogicPoolCompleted++
}
case strings.HasPrefix(job.Name, chunkserver.PrepareJobName):
if isJobCompleted(job) {
info.jobPreChunkFileCompleted++
}
}
}

if c.SnapShotClone.Enable {
if info.snapShotCloneReady != nodeCount {
isAllReadyOrCompleted = false
}
}

if c.Monitor.Enable {
if info.grafanaReady == 0 ||
info.prometheusReady == 0 ||
info.nodeExporterReady != nodeCount {
isAllReadyOrCompleted = false
}
}

if c.Kind == config.KIND_CURVEBS && (info.chunkServerReady != wantChunkServer ||
info.jobPreChunkFileCompleted != wantChunkServer ||
info.jobProLogicPoolCompleted == 0 ||
info.jobProPhysicalPoolCompleted == 0) {
isAllReadyOrCompleted = false
}

if c.Kind == config.KIND_CURVEFS &&
(info.metaServerReady != nodeCount || info.jobProLogicPoolCompleted == 0) {
isAllReadyOrCompleted = false
}

if info.etcdReady != nodeCount || info.mdsReady != nodeCount {
isAllReadyOrCompleted = false
}

if isAllReadyOrCompleted {
break
}
<-checkTicker.C
}

err := c.Context.Clientset.AppsV1().Deployments(c.Namespace).Delete(syncConfigDeployment, &metav1.DeleteOptions{})
if err != nil {
logger.Errorf("failed to delete deployment about \"curve-sync-config\", error: %s", err)
}

logger.Infof("cluster is deployed, deployment about \"curve-sync-config\" will be deleted")
}

func isAllReplicasReady(deployment apps.Deployment) bool {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The deployment operation and job operation code put the k8sutil dir is better.

if deployment.Status.Replicas == deployment.Status.ReadyReplicas {
return true
}
return false
}

func isJobCompleted(job batch.Job) bool {
if *job.Spec.Completions == job.Status.Succeeded {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the job status is failed or other status sometimes.

return true
}
return false
}