Skip to content

Commit

Permalink
ceph_ec_profile: added support for define failure domain
Browse files Browse the repository at this point in the history
For a large EC deployments, for example EC 8+3:

* we need 12 racks
* we have 10 hosts in rack with 60 OSD's per host
* definitely need a failure domain `rack`

The `crush-failure-domain=rack` in profile cmd  will create a CRUSH rule that ensures no two chunks are stored in the same rack.

Signed-off-by: Konstantin Shalygin <[email protected]>
  • Loading branch information
k0ste committed Nov 29, 2023
1 parent 490ca79 commit e026729
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 9 deletions.
13 changes: 12 additions & 1 deletion library/ceph_ec_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@
- Compute coding chunks for each object and store them on different
OSDs.
required: true
crush_failure_domain:
description:
- The domain for data durability (host/rack)
required: false
crush_device_class:
description:
- Restrict placement to devices of a specific class (hdd/ssd)
Expand Down Expand Up @@ -116,14 +120,16 @@ def get_profile(module, name, cluster='ceph', container_image=None):
return cmd


def create_profile(module, name, k, m, stripe_unit, crush_device_class, cluster='ceph', force=False, container_image=None): # noqa: E501
def create_profile(module, name, k, m, stripe_unit, crush_failure_domain, crush_device_class, cluster='ceph', force=False, container_image=None): # noqa: E501
'''
Create a profile
'''

args = ['set', name, 'k={}'.format(k), 'm={}'.format(m)]
if stripe_unit:
args.append('stripe_unit={}'.format(stripe_unit))
if crush_failure_domain:
args.append('crush-failure-domain={}'.format(crush_failure_domain))
if crush_device_class:
args.append('crush-device-class={}'.format(crush_device_class))
if force:
Expand Down Expand Up @@ -161,6 +167,7 @@ def run_module():
stripe_unit=dict(type='str', required=False),
k=dict(type='str', required=False),
m=dict(type='str', required=False),
crush_failure_domain=dict(type='str', required=False, default=''),
crush_device_class=dict(type='str', required=False, default=''),
)

Expand All @@ -177,6 +184,7 @@ def run_module():
stripe_unit = module.params.get('stripe_unit')
k = module.params.get('k')
m = module.params.get('m')
crush_failure_domain = module.params.get('crush_failure_domain')
crush_device_class = module.params.get('crush_device_class')

if module.check_mode:
Expand Down Expand Up @@ -205,13 +213,15 @@ def run_module():
if current_profile['k'] != k or \
current_profile['m'] != m or \
current_profile.get('stripe_unit', stripe_unit) != stripe_unit or \
current_profile.get('crush-failure-domain', crush_failure_domain) != crush_failure_domain or \
current_profile.get('crush-device-class', crush_device_class) != crush_device_class: # noqa: E501
rc, cmd, out, err = exec_command(module,
create_profile(module,
name,
k,
m,
stripe_unit,
crush_failure_domain, # noqa: E501
crush_device_class, # noqa: E501
cluster,
force=True, container_image=container_image)) # noqa: E501
Expand All @@ -223,6 +233,7 @@ def run_module():
k,
m,
stripe_unit, # noqa: E501
crush_failure_domain, # noqa: E501
crush_device_class, # noqa: E501
cluster,
container_image=container_image)) # noqa: E501
Expand Down
1 change: 1 addition & 0 deletions roles/ceph-rgw/tasks/rgw_create_pools.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
cluster: "{{ cluster }}"
k: "{{ item.value.ec_k }}"
m: "{{ item.value.ec_m }}"
crush_failure_domain: "{{ item.value.crush_failure_domain | default(omit) }}"
crush_device_class: "{{ item.value.ec_crush_device_class | default(omit) }}"
delegate_to: "{{ groups[mon_group_name][0] }}"
loop: "{{ rgw_create_pools | dict2items }}"
Expand Down
19 changes: 11 additions & 8 deletions tests/library/test_ceph_ec_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ def test_get_profile(self):

assert ceph_ec_profile.get_profile(self.fake_module, self.fake_name) == expected_cmd

@pytest.mark.parametrize("stripe_unit,crush_device_class,force", [(False, None, False),
(32, None, True),
(False, None, True),
(32, None, False),
(False, 'hdd', False),
(32, 'ssd', True),
(False, 'nvme', True),
(32, 'hdd', False)])
@pytest.mark.parametrize("stripe_unit,crush_failure_domain,crush_device_class,force", [(False, None, None, False),
(32, None, None, True),
(False, None, None, True),
(32, None, None, False),
(False, 'host', 'hdd', False),
(32, 'host,', 'ssd', True),
(False, 'host', 'nvme', True),
(32, 'host', 'hdd', False)])
def test_create_profile(self, stripe_unit, crush_device_class, force):
expected_cmd = [
self.fake_binary,
Expand All @@ -48,6 +48,8 @@ def test_create_profile(self, stripe_unit, crush_device_class, force):
]
if stripe_unit:
expected_cmd.append('stripe_unit={}'.format(stripe_unit))
if crush_failure_domain:
expected_cmd.append('crush-failure-domain={}'.format(crush_failure_domain))
if crush_device_class:
expected_cmd.append('crush-device-class={}'.format(crush_device_class))
if force:
Expand All @@ -58,6 +60,7 @@ def test_create_profile(self, stripe_unit, crush_device_class, force):
self.fake_k,
self.fake_m,
stripe_unit,
crush_failure_domain,
crush_device_class,
self.fake_cluster,
force) == expected_cmd
Expand Down

0 comments on commit e026729

Please sign in to comment.