Skip to content

Commit

Permalink
ceph-handler: use haproxy maintenance for rgw restarts
Browse files Browse the repository at this point in the history
RGW currently restarts without waiting for existing connections to
close. By adjusting the HAProxy weight before the restart, we can
ensure that no active connections are disrupted during the restart
process.

Signed-off-by: Seena Fallah <[email protected]>
  • Loading branch information
clwluvw committed Jun 20, 2024
1 parent 59198f5 commit 4af2552
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 5 deletions.
1 change: 1 addition & 0 deletions group_vars/all.yml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,7 @@ dummy:
# RGW handler checks
#handler_health_rgw_check_retries: 5
#handler_health_rgw_check_delay: 10
#handler_rgw_use_haproxy_maintenance: false

# NFS handler checks
#handler_health_nfs_check_retries: 5
Expand Down
1 change: 1 addition & 0 deletions roles/ceph-defaults/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ handler_health_mds_check_delay: 10
# RGW handler checks
handler_health_rgw_check_retries: 5
handler_health_rgw_check_delay: 10
handler_rgw_use_haproxy_maintenance: false

# NFS handler checks
handler_health_nfs_check_retries: 5
Expand Down
30 changes: 25 additions & 5 deletions roles/ceph-handler/templates/restart_rgw_daemon.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ else
RGW_PROTOCOL=http
fi
INSTANCES_NAME=({% for i in rgw_instances %}{{ i.instance_name }} {% endfor %})
HAPROXY_BACKEND=({% for i in rgw_instances %}{{ i.haproxy_backend | default('rgw-backend') }} {% endfor %})
RGW_IPS=({% for i in rgw_instances %}{{ i.radosgw_address }} {% endfor %})
RGW_PORTS=({% for i in rgw_instances %}{{ i.radosgw_frontend_port }} {% endfor %})
RGW_ZONE="{{ rgw_zone }}"
Expand Down Expand Up @@ -78,19 +79,38 @@ check_rest() {
}
for ((i=0; i<${RGW_NUMS}; i++)); do
# First, restart the daemon
# Check if systemd unit exists
# This is needed for new instances as the restart might trigger before the deployment
if systemctl list-units --full --all | grep -q "ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}"; then
systemctl restart ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}
else
if ! systemctl list-units --full --all | grep -q "ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}"; then
echo "Systemd unit ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]} does not exist."
continue
fi
{% if handler_rgw_use_haproxy_maintenance %}
# set server weight to 0 on haproxy
echo "set weight ${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} 0" | socat stdio {{ haproxy_socket_path }}
# wait for max 60 seconds for the connections to drop
retries=60
while [ $retries -gt 0 ]; do
if [ "$(echo "show servers conn ${HAPROXY_BACKEND[i]}" | socat stdio {{ haproxy_socket_path }} | grep "${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} " | awk '{ print $7 }')" -eq 0 ]; then
break
fi
sleep 1
let retries=retries-1
done
{% endif %}
# Restart the daemon
systemctl restart ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}
# Check socket files
check_socket ${i}
# Check rest
check_rest ${i}
{% if handler_rgw_use_haproxy_maintenance %}
# set server weight to 100 on haproxy
echo "set weight ${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} 100" | socat stdio {{ haproxy_socket_path }}
{% endif %}
done

0 comments on commit 4af2552

Please sign in to comment.