From 90ed33ca3001ca74145ae0a06cdfc818a98acccd Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Thu, 18 Apr 2024 14:02:29 +0200 Subject: [PATCH] util: add support to configure mirror daemon count Currently we are assuming that only one rbd mirror daemon running on the ceph cluster but that is not true for many cases and it can be more that one, this PR make this as a configurable parameter. fixes: #4312 Signed-off-by: Madhu Rajanna --- charts/ceph-csi-rbd/values.yaml | 1 + deploy/csi-config-map-sample.yaml | 3 + internal/rbd/rbd_util.go | 6 +- internal/util/csiconfig.go | 17 ++++ internal/util/csiconfig_test.go | 83 +++++++++++++++++++ .../api/deploy/kubernetes/csi-config-map.go | 2 + 6 files changed, 111 insertions(+), 1 deletion(-) diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index 0a8195e7402c..a3821ff7bb1e 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -27,6 +27,7 @@ serviceAccounts: # - "" # rbd: # netNamespaceFilePath: "{{ .kubeletDir }}/plugins/{{ .driverName }}/net" +# mirrorDaemonCount: 1 # readAffinity: # enabled: true # crushLocationLabels: diff --git a/deploy/csi-config-map-sample.yaml b/deploy/csi-config-map-sample.yaml index d44f96905060..e0263a0d8c9d 100644 --- a/deploy/csi-config-map-sample.yaml +++ b/deploy/csi-config-map-sample.yaml @@ -19,6 +19,8 @@ kind: ConfigMap # NOTE: The given radosNamespace must already exists in the pool. # NOTE: Make sure you don't add radosNamespace option to a currently in use # configuration as it will cause issues. +# The "rbd.mirrorDaemonCount" is optional and represents the total number of +# RBD mirror daemons running on the ceph cluster. # The field "cephFS.subvolumeGroup" is optional and defaults to "csi". # NOTE: The given subvolumeGroup must already exist in the filesystem. # The "cephFS.netNamespaceFilePath" fields are the various network namespace @@ -64,6 +66,7 @@ data: "rbd": { "netNamespaceFilePath": "/plugins/rbd.csi.ceph.com/net", "radosNamespace": "", + "mirrorDaemonCount": 1, }, "monitors": [ "", diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index e3852893295e..fa3f68a7749f 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -553,9 +553,13 @@ func (ri *rbdImage) isInUse() (bool, error) { // because we opened the image, there is at least one watcher defaultWatchers := 1 if mirrorInfo.Primary { + count, err := util.GetRBDMirrorDaemonCount(util.CsiConfigFile, ri.ClusterID) + if err != nil { + return false, err + } // if rbd mirror daemon is running, a watcher will be added by the rbd // mirror daemon for mirrored images. - defaultWatchers++ + defaultWatchers += count } return len(watchers) > defaultWatchers, nil diff --git a/internal/util/csiconfig.go b/internal/util/csiconfig.go index 0982172d8679..6d3e7109ae0b 100644 --- a/internal/util/csiconfig.go +++ b/internal/util/csiconfig.go @@ -45,6 +45,7 @@ const ( "clusterID": "", "rbd": { "radosNamespace": "" + "mirrorDaemonCount": 1 }, "monitors": [ "", @@ -105,6 +106,22 @@ func GetRadosNamespace(pathToConfig, clusterID string) (string, error) { return cluster.RBD.RadosNamespace, nil } +// GetRBDMirrorDaemonCount returns the number of mirror daemon count for the +// given clusterID. +func GetRBDMirrorDaemonCount(pathToConfig, clusterID string) (int, error) { + cluster, err := readClusterInfo(pathToConfig, clusterID) + if err != nil { + return 0, err + } + + // if it is empty, set the default to 1 which is most common in a cluster. + if cluster.RBD.MirrorDaemonCount == 0 { + return 1, nil + } + + return cluster.RBD.MirrorDaemonCount, nil +} + // CephFSSubvolumeGroup returns the subvolumeGroup for CephFS volumes. If not set, it returns the default value "csi". func CephFSSubvolumeGroup(pathToConfig, clusterID string) (string, error) { cluster, err := readClusterInfo(pathToConfig, clusterID) diff --git a/internal/util/csiconfig_test.go b/internal/util/csiconfig_test.go index 8da099076896..d058050ba02d 100644 --- a/internal/util/csiconfig_test.go +++ b/internal/util/csiconfig_test.go @@ -17,11 +17,14 @@ limitations under the License. package util import ( + "bytes" "encoding/json" "os" "testing" cephcsi "github.com/ceph/ceph-csi/api/deploy/kubernetes" + + "github.com/stretchr/testify/require" ) var ( @@ -530,3 +533,83 @@ func TestGetCephFSMountOptions(t *testing.T) { }) } } + +func TestGetRBDMirrorDaemonCount(t *testing.T) { + t.Parallel() + tests := []struct { + name string + clusterID string + want int + }{ + { + name: "get rbd mirror daemon count for cluster-1", + clusterID: "cluster-1", + want: 2, + }, + { + name: "get rbd mirror daemon count for cluster-2", + clusterID: "cluster-2", + want: 4, + }, + { + name: "when rbd mirror daemon count is empty", + clusterID: "cluster-3", + want: 1, // default mirror daemon count + }, + } + + csiConfig := []cephcsi.ClusterInfo{ + { + ClusterID: "cluster-1", + Monitors: []string{"ip-1", "ip-2"}, + RBD: cephcsi.RBD{ + MirrorDaemonCount: 2, + }, + }, + { + ClusterID: "cluster-2", + Monitors: []string{"ip-3", "ip-4"}, + RBD: cephcsi.RBD{ + MirrorDaemonCount: 4, + }, + }, + { + ClusterID: "cluster-3", + Monitors: []string{"ip-5", "ip-6"}, + }, + } + csiConfigFileContent, err := json.Marshal(csiConfig) + if err != nil { + t.Errorf("failed to marshal csi config info %v", err) + } + tmpConfPath := t.TempDir() + "/ceph-csi.json" + err = os.WriteFile(tmpConfPath, csiConfigFileContent, 0o600) + if err != nil { + t.Errorf("failed to write %s file content: %v", CsiConfigFile, err) + } + for _, tt := range tests { + ts := tt + t.Run(ts.name, func(t *testing.T) { + t.Parallel() + got, err := GetRBDMirrorDaemonCount(tmpConfPath, ts.clusterID) + if err != nil { + t.Errorf("GetRBDMirrorDaemonCount() error = %v", err) + + return + } + if got != ts.want { + t.Errorf("GetRBDMirrorDaemonCount() = %v, want %v", got, ts.want) + } + }) + } + + // when mirrorDaemonCount is set as string + csiConfigFileContent = bytes.Replace(csiConfigFileContent, []byte(`"mirrorDaemonCount":2`), []byte(`"mirrorDaemonCount":"2"`), 1) + tmpCSIConfPath := t.TempDir() + "/ceph-csi.json" + err = os.WriteFile(tmpCSIConfPath, csiConfigFileContent, 0o600) + if err != nil { + t.Errorf("failed to write %s file content: %v", CsiConfigFile, err) + } + _, err = GetRBDMirrorDaemonCount(tmpCSIConfPath, "test") + require.Error(t, err) +} diff --git a/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go b/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go index 4d237b3c51f5..0c418e23d117 100644 --- a/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go +++ b/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go @@ -46,6 +46,8 @@ type RBD struct { NetNamespaceFilePath string `json:"netNamespaceFilePath"` // RadosNamespace is a rados namespace in the pool RadosNamespace string `json:"radosNamespace"` + // RBD mirror daemons running in the ceph cluster. + MirrorDaemonCount int `json:"mirrorDaemonCount"` } type NFS struct {