Skip to content

Commit

Permalink
Update pgbouncer pause script
Browse files Browse the repository at this point in the history
  • Loading branch information
vitabaks committed Nov 7, 2023
1 parent 6ea2140 commit 391e291
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 45 deletions.
2 changes: 1 addition & 1 deletion molecule/pg_upgrade/converge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
consul_node_role: server # if dcs_type: "consul"
consul_bootstrap_expect: true # if dcs_type: "consul"
postgresql_version: "14" # redefine the version to install for the upgrade test
pgbouncer_processes: 1 # Test multiple pgbouncer processes (so_reuseport)
pgbouncer_processes: 4 # Test multiple pgbouncer processes (so_reuseport)
cacheable: true

- name: Set variables for custom PostgreSQL data and WAL directory test
Expand Down
4 changes: 2 additions & 2 deletions roles/upgrade/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ Please see the variable file vars/[upgrade.yml](../../vars/upgrade.yml)
- **Check if PostgreSQL tablespaces exist**
- Print tablespace location (if exists)
- Note: If tablespaces are present they will be upgraded (step 5) on replicas using rsync
- **Test PgBouncer access via localhost**
- test access via 'localhost' to be able to perform 'PAUSE' command
- **Test PgBouncer access via unix socket**
- test access via unix socket to be able to perform 'PAUSE' command
- **Make sure that the cluster ip address (VIP) is running**
- Notes: if 'cluster_vip' is defined

Expand Down
36 changes: 14 additions & 22 deletions roles/upgrade/tasks/pgbouncer_pause.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,32 +36,24 @@
and state <> 'idle'
and query_start < clock_timestamp() - interval '{{ pg_slow_active_query_treshold_to_terminate }} ms'
{{ "and backend_type = 'client backend'" if pg_old_version is version('10', '>=') else '' }}
# Depending on the number of PgBouncer processes, it either uses a local connection or the socket paths for each PgBouncer process.
pgbouncer_pause_command: >-
{% if pgbouncer_processes | default(1) | int == 1 %}
bash -c 'PGPASSWORD={{ patroni_superuser_password }} psql -h localhost -p {{ pgbouncer_listen_port }} -U {{ patroni_superuser_username }} -d pgbouncer -tAXc "PAUSE"'
{% else %}
{% set unix_socket_dir = [] %}
{% for i in range(0, pgbouncer_processes | default(1) | int) %}
{% if i == 0 %}
{{ unix_socket_dir.append('/var/run/pgbouncer') }}
{% else %}
{{ unix_socket_dir.append('/var/run/pgbouncer-' + i | string) }}
{% endif %}
{% endfor %}
echo -e "{{ unix_socket_dir | join('\n') }}" | xargs -I {} -P {{ pgbouncer_processes | default(1) | int }} bash -c 'PGPASSWORD={{ patroni_superuser_password }} psql -h {} -p {{ pgbouncer_listen_port }} -U {{ patroni_superuser_username }} -d pgbouncer -tAXc "PAUSE"'
{% endif %}
pgb_unix_socket_dirs: >-
{% set unix_socket_dir = ['/var/run/pgbouncer'] %}
{%- for idx in range(1, pgbouncer_processes | default(1) | int) -%}
{{ unix_socket_dir.append('/var/run/pgbouncer-' + (idx + 1) | string) }}
{%- endfor -%}
{{ unix_socket_dir | join(' ') }}
ansible.builtin.shell: |
set -o pipefail;
pg_servers="{{ (groups['primary'] + groups['secondary']) | join('\n') }}"
pg_count=$(echo -e "$pg_servers" | wc -l)
pg_servers_count="{{ groups['primary'] | default([]) | length + groups['secondary'] | default([]) | length }}"
pg_slow_active_count_query="{{ pg_slow_active_count_query }}"
pg_slow_active_terminate_query="{{ pg_slow_active_terminate_query }}"
# it is assumed that pgbouncer is installed on database servers
pgb_servers="$pg_servers"
pgb_count="$pg_count"
pgb_pause_command="{{ pgbouncer_pause_command }}"
pgb_servers_count="$pg_servers_count"
pgb_count="{{ (groups['primary'] | default([]) | length + groups['secondary'] | default([]) | length) * (pgbouncer_processes | default(1) | int) }}"
pgb_pause_command="printf '%s\n' {{ pgb_unix_socket_dirs }} | xargs -I {} -P {{ pgbouncer_processes | default(1) | int }} -n 1 psql -h {} -p {{ pgbouncer_listen_port }} -U {{ patroni_superuser_username }} -d pgbouncer -tAXc 'PAUSE'"
pgb_resume_command='kill -SIGUSR2 $(pidof pgbouncer)'
start_time=$(date +%s)
Expand All @@ -71,7 +63,7 @@
pgb_paused_count=0
# wait for the active queries to complete on pg_servers
IFS=$'\n' pg_slow_active_counts=($(echo -e "$pg_servers" | xargs -I {} -P "$pg_count" -n 1 ssh -o StrictHostKeyChecking=no {} "psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc \"$pg_slow_active_count_query\""))
IFS=$'\n' pg_slow_active_counts=($(echo -e "$pg_servers" | xargs -I {} -P "$pg_servers_count" -n 1 ssh -o StrictHostKeyChecking=no {} "psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc \"$pg_slow_active_count_query\""))
# sum up all the values in the array
total_pg_slow_active_count=0
Expand All @@ -83,7 +75,7 @@
if [[ "$total_pg_slow_active_count" == 0 ]]; then
# pause pgbouncer on all pgb_servers. We send via ssh to all pgbouncers in parallel and collect results from all (maximum wait time 2 seconds)
IFS=$'\n' pause_results=($(echo -e "$pgb_servers" | xargs -I {} -P "$pgb_count" -n 1 ssh -o StrictHostKeyChecking=no {} "timeout 2 $pgb_pause_command 2>&1 || true"))
IFS=$'\n' pause_results=($(echo -e "$pgb_servers" | xargs -I {} -P "$pgb_servers_count" -n 1 ssh -o StrictHostKeyChecking=no {} "timeout 2 $pgb_pause_command 2>&1 || true"))
echo "${pause_results[*]}"
# analyze the pause_results array to count the number of paused pgbouncers
pgb_paused_count=$(echo "${pause_results[*]}" | grep -o -e "PAUSE" -e "already suspended/paused" | wc -l)
Expand All @@ -95,14 +87,14 @@
break # pause is performed on all pgb_servers, exit from the loop
elif [[ "$pgb_paused_count" -gt 0 && "$pgb_paused_count" -ne "$pgb_count" ]]; then
# pause is not performed on all pgb_servers, perform resume (we do not use timeout because we mast to resume all pgbouncers)
IFS=$'\n' resume_results=($(echo -e "$pgb_servers" | xargs -I {} -P "$pgb_count" -n 1 ssh -o StrictHostKeyChecking=no {} "$pgb_resume_command 2>&1 || true"))
IFS=$'\n' resume_results=($(echo -e "$pgb_servers" | xargs -I {} -P "$pgb_servers_count" -n 1 ssh -o StrictHostKeyChecking=no {} "$pgb_resume_command 2>&1 || true"))
echo "${resume_results[*]}"
fi
# after 30 seconds of waiting, terminate active sessions on pg_servers and try pausing again
if (( current_time - start_time >= {{ pgbouncer_pool_pause_terminate_after }} )); then
echo "$(date): terminate active queries"
echo -e "$pg_servers" | xargs -I {} -P "$pg_count" -n 1 ssh -o StrictHostKeyChecking=no {} "psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc \"$pg_slow_active_terminate_query\""
echo -e "$pg_servers" | xargs -I {} -P "$pg_servers_count" -n 1 ssh -o StrictHostKeyChecking=no {} "psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc \"$pg_slow_active_terminate_query\""
fi
# if it was not possible to pause for 60 seconds, exit with an error
Expand Down
20 changes: 1 addition & 19 deletions roles/upgrade/tasks/pre_checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -334,22 +334,7 @@
- tablespace_location.stdout_lines | length > 0

# PgBouncer (if 'pgbouncer_pool_pause' is 'true')
# test access via localhost to be able to perform 'PAUSE' command
- name: '[Pre-Check] Test PgBouncer access via localhost'
ansible.builtin.command: >-
psql -h localhost
-p {{ pgbouncer_listen_port }}
-U {{ patroni_superuser_username }}
-d pgbouncer
-tAXc "SHOW POOLS"
changed_when: false
environment:
PGPASSWORD: "{{ patroni_superuser_password }}"
when:
- pgbouncer_install | bool
- pgbouncer_pool_pause | bool
- pgbouncer_processes | default(1) | int == 1

# test access via unix socket to be able to perform 'PAUSE' command
- name: '[Pre-Check] Test PgBouncer access via unix socket'
ansible.builtin.command: >-
psql -h /var/run/pgbouncer{{ '-%d' % (idx + 1) if idx > 0 else '' }}
Expand All @@ -362,12 +347,9 @@
index_var: idx
label: "{{ 'pgbouncer' if idx == 0 else 'pgbouncer-%d' % (idx + 1) }}"
changed_when: false
environment:
PGPASSWORD: "{{ patroni_superuser_password }}"
when:
- pgbouncer_install | bool
- pgbouncer_pool_pause | bool
- pgbouncer_processes | default(1) | int > 1

# Check the VIP address
- name: Make sure that the cluster ip address (VIP) "{{ cluster_vip }}" is running
Expand Down
2 changes: 1 addition & 1 deletion vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,8 @@ postgresql_pg_ident: []
# the password file (~/.pgpass)
postgresql_pgpass:
- "localhost:{{ postgresql_port }}:*:{{ patroni_superuser_username }}:{{ patroni_superuser_password }}"
- "localhost:{{ pgbouncer_listen_port }}:*:{{ patroni_superuser_username }}:{{ patroni_superuser_password }}"
- "{{ inventory_hostname }}:{{ postgresql_port }}:*:{{ patroni_superuser_username }}:{{ patroni_superuser_password }}"
- "*:{{ pgbouncer_listen_port }}:*:{{ patroni_superuser_username }}:{{ patroni_superuser_password }}"
# - hostname:port:database:username:password


Expand Down

0 comments on commit 391e291

Please sign in to comment.