diff --git a/group_vars/all.yml.sample b/group_vars/all.yml.sample index a2651614a3..d596d53a72 100644 --- a/group_vars/all.yml.sample +++ b/group_vars/all.yml.sample @@ -541,8 +541,6 @@ dummy: # CONTAINER_IMAGE: "{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}" # TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES: "{{ ceph_tcmalloc_max_total_thread_cache }}" # args: -# - --setuser=ceph -# - --setgroup=ceph # - --default-log-to-file=false # - --default-log-to-stderr=true # - --default-log-stderr-prefix="debug " diff --git a/group_vars/exporters.yml.sample b/group_vars/exporters.yml.sample new file mode 100644 index 0000000000..9bb126b901 --- /dev/null +++ b/group_vars/exporters.yml.sample @@ -0,0 +1,30 @@ +--- +# Variables here are applicable to all host groups NOT roles + +# This sample file generated by generate_group_vars_sample.sh + +# Dummy variable to avoid error because ansible does not recognize the +# file as a good configuration file when no variable in it. +dummy: + +########### +# GENERAL # +########### + +#ceph_exporter_addr: "0.0.0.0" +#ceph_exporter_port: 9926 +#ceph_exporter_stats_period: 5 # seconds + +########## +# DOCKER # +########## + +# If you want to add parameters, you should retain the existing ones and include the new ones. +#ceph_exporter_container_params: +# args: +# - -f +# - --sock-dir=/var/run/ceph +# - --addrs={{ ceph_exporter_addr }} +# - --port={{ ceph_exporter_port }} +# - --stats-period={{ ceph_exporter_stats_period }} + diff --git a/group_vars/mdss.yml.sample b/group_vars/mdss.yml.sample index 592421b5c1..edc556532d 100644 --- a/group_vars/mdss.yml.sample +++ b/group_vars/mdss.yml.sample @@ -26,7 +26,6 @@ dummy: # Resource limitation # For the whole list of limits you can apply see: docs.docker.com/engine/admin/resource_constraints # Default values are based from: https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/2/html/red_hat_ceph_storage_hardware_guide/minimum_recommendations -# These options can be passed using the 'ceph_mds_docker_extra_env' variable. #ceph_mds_docker_memory_limit: "{{ ansible_facts['memtotal_mb'] }}m" #ceph_mds_docker_cpu_limit: 4 diff --git a/plugins/callback/installer_checkpoint.py b/plugins/callback/installer_checkpoint.py index 976ccefb6f..de9234d218 100644 --- a/plugins/callback/installer_checkpoint.py +++ b/plugins/callback/installer_checkpoint.py @@ -34,6 +34,7 @@ def v2_playbook_on_stats(self, stats): 'installer_phase_ceph_grafana', 'installer_phase_ceph_node_exporter', 'installer_phase_ceph_crash', + 'installer_phase_ceph_exporter', ] # Define the attributes of the installer phases @@ -90,6 +91,10 @@ def v2_playbook_on_stats(self, stats): 'title': 'Install Ceph Crash', 'playbook': 'roles/ceph-crash/tasks/main.yml' }, + 'installer_phase_ceph_exporter': { + 'title': 'Install Ceph Exporter', + 'playbook': 'roles/ceph-exporter/tasks/main.yml' + }, } # Find the longest phase title diff --git a/roles/ceph-exporter/defaults/main.yml b/roles/ceph-exporter/defaults/main.yml new file mode 100644 index 0000000000..ddd2d7622f --- /dev/null +++ b/roles/ceph-exporter/defaults/main.yml @@ -0,0 +1,21 @@ +--- +########### +# GENERAL # +########### + +ceph_exporter_addr: "0.0.0.0" +ceph_exporter_port: 9926 +ceph_exporter_stats_period: 5 # seconds + +########## +# DOCKER # +########## + +# If you want to add parameters, you should retain the existing ones and include the new ones. +ceph_exporter_container_params: + args: + - -f + - --sock-dir=/var/run/ceph + - --addrs={{ ceph_exporter_addr }} + - --port={{ ceph_exporter_port }} + - --stats-period={{ ceph_exporter_stats_period }} diff --git a/roles/ceph-exporter/meta/main.yml b/roles/ceph-exporter/meta/main.yml new file mode 100644 index 0000000000..61fc0ce21b --- /dev/null +++ b/roles/ceph-exporter/meta/main.yml @@ -0,0 +1,14 @@ +--- +galaxy_info: + company: Red Hat + author: Guillaume Abrioux + description: Deploy ceph-exporter + license: Apache + min_ansible_version: '2.7' + platforms: + - name: EL + versions: + - 'all' + galaxy_tags: + - system +dependencies: [] diff --git a/roles/ceph-exporter/tasks/main.yml b/roles/ceph-exporter/tasks/main.yml new file mode 100644 index 0000000000..e8933a7b3d --- /dev/null +++ b/roles/ceph-exporter/tasks/main.yml @@ -0,0 +1,12 @@ +--- +- name: Include_tasks systemd.yml + ansible.builtin.include_tasks: systemd.yml + when: containerized_deployment | bool + +- name: Start the ceph-exporter service + ansible.builtin.systemd: + name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}" + state: started + enabled: true + masked: false + daemon_reload: true diff --git a/roles/ceph-exporter/tasks/systemd.yml b/roles/ceph-exporter/tasks/systemd.yml new file mode 100644 index 0000000000..4e4733f9d6 --- /dev/null +++ b/roles/ceph-exporter/tasks/systemd.yml @@ -0,0 +1,9 @@ +--- +- name: Generate systemd unit file for ceph-exporter container + ansible.builtin.template: + src: "{{ role_path }}/templates/ceph-exporter.service.j2" + dest: /etc/systemd/system/ceph-exporter@.service + owner: "root" + group: "root" + mode: "0644" + notify: Restart ceph exporter diff --git a/roles/ceph-exporter/templates/ceph-exporter.service.j2 b/roles/ceph-exporter/templates/ceph-exporter.service.j2 new file mode 100644 index 0000000000..171bcd9d7c --- /dev/null +++ b/roles/ceph-exporter/templates/ceph-exporter.service.j2 @@ -0,0 +1,50 @@ +[Unit] +Description=Ceph exporter +{% if container_binary == 'docker' %} +After=docker.service network-online.target local-fs.target time-sync.target +Requires=docker.service +{% else %} +After=network-online.target local-fs.target time-sync.target +{% endif %} +Wants=network-online.target local-fs.target time-sync.target + +[Service] +{% if container_binary == 'podman' %} +ExecStartPre=-/usr/bin/rm -f /%t/%n-pid /%t/%n-cid +ExecStartPre=-/usr/bin/{{ container_binary }} rm --storage ceph-exporter-%i +{% endif %} +ExecStartPre=-/usr/bin/{{ container_binary }} rm -f ceph-exporter-%i +ExecStart=/usr/bin/{{ container_binary }} run --rm --name ceph-exporter-%i \ +{% if container_binary == 'podman' %} +-d --log-driver journald --conmon-pidfile /%t/%n-pid --cidfile /%t/%n-cid \ +{% endif %} +--pids-limit={{ 0 if container_binary == 'podman' else -1 }} \ +--security-opt label=disable \ +--net=host \ +{% for v in ceph_common_container_params['volumes'] + ceph_exporter_container_params['volumes'] | default([]) %} + -v {{ v }} \ +{% endfor %} +{% for k, v in (ceph_common_container_params['envs'] | combine(ceph_exporter_container_params['envs'] | default({}))).items() %} + -e {{ k }}={{ v }} \ +{% endfor %} +--entrypoint=/usr/bin/ceph-exporter {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \ + {{ (ceph_common_container_params['args'] + ceph_exporter_container_params['args'] | default([])) | join(' ') }} +{% if container_binary == 'podman' %} +ExecStop=-/usr/bin/sh -c "/usr/bin/{{ container_binary }} rm -f `cat /%t/%n-cid`" +{% else %} +ExecStop=-/usr/bin/{{ container_binary }} stop ceph-exporter-%i +{% endif %} +StartLimitInterval=10min +StartLimitBurst=30 +{% if container_binary == 'podman' %} +Type=forking +PIDFile=/%t/%n-pid +{% endif %} +KillMode=none +Restart=always +RestartSec=10s +TimeoutStartSec=120 +TimeoutStopSec=10 + +[Install] +WantedBy=ceph.target diff --git a/roles/ceph-handler/handlers/main.yml b/roles/ceph-handler/handlers/main.yml index d634b9dd87..9de73241a5 100644 --- a/roles/ceph-handler/handlers/main.yml +++ b/roles/ceph-handler/handlers/main.yml @@ -65,6 +65,17 @@ or inventory_hostname in groups.get(rgw_group_name, []) or inventory_hostname in groups.get(rbdmirror_group_name, []) + - name: Ceph exporter handler + ansible.builtin.include_tasks: handler_exporter.yml + listen: "Restart ceph exporter" + when: + - inventory_hostname in groups.get(mon_group_name, []) + or inventory_hostname in groups.get(mgr_group_name, []) + or inventory_hostname in groups.get(osd_group_name, []) + or inventory_hostname in groups.get(mds_group_name, []) + or inventory_hostname in groups.get(rgw_group_name, []) + or inventory_hostname in groups.get(rbdmirror_group_name, []) + - name: Remove tempdir for scripts ansible.builtin.file: path: "{{ tmpdirpath.path }}" @@ -80,6 +91,7 @@ register: tmpdirpath when: - tmpdirpath.path is defined + - not _exporter_handler_called | default(false) | bool - not _crash_handler_called | default(false) | bool - not _mds_handler_called | default(false) | bool - not _mgr_handler_called | default(false) | bool diff --git a/roles/ceph-handler/tasks/check_running_containers.yml b/roles/ceph-handler/tasks/check_running_containers.yml index 82f7c5c135..551a15c83b 100644 --- a/roles/ceph-handler/tasks/check_running_containers.yml +++ b/roles/ceph-handler/tasks/check_running_containers.yml @@ -68,3 +68,17 @@ or inventory_hostname in groups.get(mds_group_name, []) or inventory_hostname in groups.get(rgw_group_name, []) or inventory_hostname in groups.get(rbdmirror_group_name, []) + +- name: Check for a ceph-exporter container + ansible.builtin.command: "{{ container_binary }} ps -q --filter='name=ceph-exporter-{{ ansible_facts['hostname'] }}'" + register: ceph_exporter_container_stat + changed_when: false + failed_when: false + check_mode: false + when: + - inventory_hostname in groups.get(mon_group_name, []) + or inventory_hostname in groups.get(mgr_group_name, []) + or inventory_hostname in groups.get(osd_group_name, []) + or inventory_hostname in groups.get(mds_group_name, []) + or inventory_hostname in groups.get(rgw_group_name, []) + or inventory_hostname in groups.get(rbdmirror_group_name, []) diff --git a/roles/ceph-handler/tasks/check_socket_non_container.yml b/roles/ceph-handler/tasks/check_socket_non_container.yml index 37e1b0f959..96c492ffcc 100644 --- a/roles/ceph-handler/tasks/check_socket_non_container.yml +++ b/roles/ceph-handler/tasks/check_socket_non_container.yml @@ -218,3 +218,17 @@ or inventory_hostname in groups.get(mds_group_name, []) or inventory_hostname in groups.get(rgw_group_name, []) or inventory_hostname in groups.get(rbdmirror_group_name, []) + +- name: Check for a ceph-exporter process + ansible.builtin.command: pgrep ceph-exporter + changed_when: false + failed_when: false + check_mode: false + register: exporter_process + when: + - inventory_hostname in groups.get(mon_group_name, []) + or inventory_hostname in groups.get(mgr_group_name, []) + or inventory_hostname in groups.get(osd_group_name, []) + or inventory_hostname in groups.get(mds_group_name, []) + or inventory_hostname in groups.get(rgw_group_name, []) + or inventory_hostname in groups.get(rbdmirror_group_name, []) diff --git a/roles/ceph-handler/tasks/handler_exporter.yml b/roles/ceph-handler/tasks/handler_exporter.yml new file mode 100644 index 0000000000..d846d69c49 --- /dev/null +++ b/roles/ceph-handler/tasks/handler_exporter.yml @@ -0,0 +1,18 @@ +--- +- name: Set _exporter_handler_called before restart + ansible.builtin.set_fact: + _exporter_handler_called: true + +- name: Restart the ceph-exporter service # noqa: ignore-errors + ansible.builtin.systemd: + name: ceph-exporter@{{ ansible_facts['hostname'] }} + state: restarted + enabled: true + masked: false + daemon_reload: true + ignore_errors: true + when: hostvars[inventory_hostname]['_exporter_handler_called'] | default(False) | bool + +- name: Set _exporter_handler_called after restart + ansible.builtin.set_fact: + _exporter_handler_called: false diff --git a/roles/ceph-handler/tasks/main.yml b/roles/ceph-handler/tasks/main.yml index 4ea3bef6d6..c963b0115f 100644 --- a/roles/ceph-handler/tasks/main.yml +++ b/roles/ceph-handler/tasks/main.yml @@ -48,3 +48,14 @@ or inventory_hostname in groups.get(mds_group_name, []) or inventory_hostname in groups.get(rgw_group_name, []) or inventory_hostname in groups.get(rbdmirror_group_name, []) + +- name: Set_fact handler_exporter_status + ansible.builtin.set_fact: + handler_exporter_status: "{{ exporter_process.get('rc') == 0 if not containerized_deployment | bool else (ceph_exporter_container_stat.get('rc') == 0 and ceph_exporter_container_stat.get('stdout_lines', []) | length != 0) }}" + when: + - inventory_hostname in groups.get(mon_group_name, []) + or inventory_hostname in groups.get(mgr_group_name, []) + or inventory_hostname in groups.get(osd_group_name, []) + or inventory_hostname in groups.get(mds_group_name, []) + or inventory_hostname in groups.get(rgw_group_name, []) + or inventory_hostname in groups.get(rbdmirror_group_name, []) diff --git a/roles/ceph-mds/defaults/main.yml b/roles/ceph-mds/defaults/main.yml index cd1342f85c..be435e3e80 100644 --- a/roles/ceph-mds/defaults/main.yml +++ b/roles/ceph-mds/defaults/main.yml @@ -18,7 +18,6 @@ copy_admin_key: false # Resource limitation # For the whole list of limits you can apply see: docs.docker.com/engine/admin/resource_constraints # Default values are based from: https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/2/html/red_hat_ceph_storage_hardware_guide/minimum_recommendations -# These options can be passed using the 'ceph_mds_docker_extra_env' variable. ceph_mds_docker_memory_limit: "{{ ansible_facts['memtotal_mb'] }}m" ceph_mds_docker_cpu_limit: 4 diff --git a/site-container.yml.sample b/site-container.yml.sample index 3935f9b20d..7031621c10 100644 --- a/site-container.yml.sample +++ b/site-container.yml.sample @@ -468,6 +468,46 @@ status: "Complete" end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}" +- hosts: + - mons + - osds + - mdss + - rgws + - rbdmirrors + - mgrs + + gather_facts: false + become: True + any_errors_fatal: true + pre_tasks: + - name: set ceph exporter install 'In Progress' + run_once: true + set_stats: + data: + installer_phase_ceph_exporter: + status: "In Progress" + start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}" + + tasks: + - import_role: + name: ceph-defaults + - import_role: + name: ceph-facts + tasks_from: container_binary.yml + - import_role: + name: ceph-handler + - import_role: + name: ceph-exporter + + post_tasks: + - name: set ceph exporter install 'Complete' + run_once: true + set_stats: + data: + installer_phase_ceph_exporter: + status: "Complete" + end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}" + - hosts: mons[0] gather_facts: false become: True diff --git a/tests/conftest.py b/tests/conftest.py index 9a12727dc5..07e35e2654 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -171,6 +171,9 @@ def node(host, request): if request.node.get_closest_marker('ceph_crash') and sanitized_group_names in [['nfss'], ['clients'], ['monitoring']]: pytest.skip('Not a valid test for nfs or client nodes') + if request.node.get_closest_marker('ceph_exporter') and sanitized_group_names in [['nfss'], ['clients'], ['monitoring']]: + pytest.skip('Not a valid test for nfs or client nodes') + if request.node.get_closest_marker("no_docker") and docker: pytest.skip( "Not a valid test for containerized deployments or atomic hosts") diff --git a/tests/functional/tests/test_install.py b/tests/functional/tests/test_install.py index 69f38c41d8..38d87c2fd5 100644 --- a/tests/functional/tests/test_install.py +++ b/tests/functional/tests/test_install.py @@ -45,3 +45,12 @@ def test_ceph_crash_service_enabled_and_running_container(self, node, host): s = host.service("ceph-crash@{hostname}".format(hostname=node["vars"]["inventory_hostname"])) assert s.is_enabled assert s.is_running + + +class TestCephExporter(object): + @pytest.mark.docker + @pytest.mark.ceph_exporter + def test_ceph_exporter_service_enabled_and_running_container(self, node, host): + s = host.service("ceph-exporter@{hostname}".format(hostname=node["vars"]["inventory_hostname"])) + assert s.is_enabled + assert s.is_running diff --git a/tests/pytest.ini b/tests/pytest.ini index 61620b9ebd..d4c15634be 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -2,6 +2,7 @@ # dir really is. [pytest] markers = + ceph_exporter: environment with ceph exporter enabled ceph_crash: environment with ceph crash enabled dashboard: environment with dashboard enabled no_docker: environment without containers