Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Syncing latest changes from release-1.13 for rook #544

Merged
merged 4 commits into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions cmd/rook/ceph/osd.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,12 +326,8 @@ func removeOSDs(cmd *cobra.Command, args []string) error {
return errors.Wrapf(err, "failed to parse --preserve-pvc flag")
}

exitIfNotSafe := false
forceRemovalCallback := func(x int) (bool, bool) {
return forceOSDRemovalBool, exitIfNotSafe
}
// Run OSD remove sequence
err = osddaemon.RemoveOSDs(context, &clusterInfo, strings.Split(osdIDsToRemove, ","), preservePVCBool, forceRemovalCallback)
err = osddaemon.RemoveOSDs(context, &clusterInfo, strings.Split(osdIDsToRemove, ","), preservePVCBool, forceOSDRemovalBool)
if err != nil {
rook.TerminateFatal(err)
}
Expand Down
2 changes: 1 addition & 1 deletion deploy/charts/rook-ceph/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ rules:
# Node access is needed for determining nodes where mons should run
- nodes
- nodes/proxy
- services
# Rook watches secrets which it uses to configure access to external resources.
# e.g., external Ceph cluster or object store
- secrets
Expand All @@ -96,6 +95,7 @@ rules:
- persistentvolumeclaims
# Rook creates endpoints for mgr and object store access
- endpoints
- services
verbs:
- get
- list
Expand Down
2 changes: 1 addition & 1 deletion deploy/examples/common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,6 @@ rules:
# Node access is needed for determining nodes where mons should run
- nodes
- nodes/proxy
- services
# Rook watches secrets which it uses to configure access to external resources.
# e.g., external Ceph cluster or object store
- secrets
Expand All @@ -249,6 +248,7 @@ rules:
- persistentvolumeclaims
# Rook creates endpoints for mgr and object store access
- endpoints
- services
verbs:
- get
- list
Expand Down
19 changes: 6 additions & 13 deletions pkg/daemon/ceph/osd/remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
)

// RemoveOSDs purges a list of OSDs from the cluster
func RemoveOSDs(context *clusterd.Context, clusterInfo *client.ClusterInfo, osdsToRemove []string, preservePVC bool, forceRemovalCallback func(osdID int) (bool, bool)) error {
func RemoveOSDs(context *clusterd.Context, clusterInfo *client.ClusterInfo, osdsToRemove []string, preservePVC, forceOSDRemoval bool) error {
// Generate the ceph config for running ceph commands similar to the operator
if err := client.WriteCephConfig(context, clusterInfo); err != nil {
return errors.Wrap(err, "failed to write the ceph config")
Expand Down Expand Up @@ -63,13 +63,13 @@ func RemoveOSDs(context *clusterd.Context, clusterInfo *client.ClusterInfo, osds
logger.Infof("osd.%d is marked 'DOWN'", osdID)
}

removeOSD(context, clusterInfo, osdID, preservePVC, forceRemovalCallback)
removeOSD(context, clusterInfo, osdID, preservePVC, forceOSDRemoval)
}

return nil
}

func removeOSD(clusterdContext *clusterd.Context, clusterInfo *client.ClusterInfo, osdID int, preservePVC bool, forceRemovalCallback func(osdID int) (bool, bool)) {
func removeOSD(clusterdContext *clusterd.Context, clusterInfo *client.ClusterInfo, osdID int, preservePVC, forceOSDRemoval bool) {
// Get the host where the OSD is found
hostName, err := client.GetCrushHostName(clusterdContext, clusterInfo, osdID)
if err != nil {
Expand All @@ -83,21 +83,17 @@ func removeOSD(clusterdContext *clusterd.Context, clusterInfo *client.ClusterInf
if err != nil {
logger.Errorf("failed to exclude osd.%d out of the crush map. %v", osdID, err)
}
forceRemoval, exitIfNotSafe := forceRemovalCallback(osdID)

// Check we can remove the OSD
// Loop forever until the osd is safe-to-destroy
for {
isSafeToDestroy, err := client.OsdSafeToDestroy(clusterdContext, clusterInfo, osdID)
if err != nil {
// If we want to force remove the OSD and there was an error let's break outside of
// the loop and proceed with the OSD removal

if forceRemoval {
if forceOSDRemoval {
logger.Errorf("failed to check if osd %d is safe to destroy, but force removal is enabled so proceeding with removal. %v", osdID, err)
break
} else if exitIfNotSafe {
logger.Error("osd.%d is not safe to destroy")
return
} else {
logger.Errorf("failed to check if osd %d is safe to destroy, retrying in 1m. %v", osdID, err)
time.Sleep(1 * time.Minute)
Expand All @@ -111,12 +107,9 @@ func removeOSD(clusterdContext *clusterd.Context, clusterInfo *client.ClusterInf
break
} else {
// If we arrive here and forceOSDRemoval is true, we should proceed with the OSD removal
if forceRemoval {
if forceOSDRemoval {
logger.Infof("osd.%d is NOT ok to destroy but force removal is enabled so proceeding with removal", osdID)
break
} else if exitIfNotSafe {
logger.Error("osd.%d is not safe to destroy")
return
}
// Else we wait until the OSD can be removed
logger.Warningf("osd.%d is NOT ok to destroy, retrying in 1m until success", osdID)
Expand Down
Loading