From 391e2916b19fe81b7fac7fc02e0707681f711867 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Tue, 7 Nov 2023 10:55:26 +0000 Subject: [PATCH] Update pgbouncer pause script --- molecule/pg_upgrade/converge.yml | 2 +- roles/upgrade/README.md | 4 +-- roles/upgrade/tasks/pgbouncer_pause.yml | 36 ++++++++++--------------- roles/upgrade/tasks/pre_checks.yml | 20 +------------- vars/main.yml | 2 +- 5 files changed, 19 insertions(+), 45 deletions(-) diff --git a/molecule/pg_upgrade/converge.yml b/molecule/pg_upgrade/converge.yml index 65184ebb2..3d20befe4 100644 --- a/molecule/pg_upgrade/converge.yml +++ b/molecule/pg_upgrade/converge.yml @@ -16,7 +16,7 @@ consul_node_role: server # if dcs_type: "consul" consul_bootstrap_expect: true # if dcs_type: "consul" postgresql_version: "14" # redefine the version to install for the upgrade test - pgbouncer_processes: 1 # Test multiple pgbouncer processes (so_reuseport) + pgbouncer_processes: 4 # Test multiple pgbouncer processes (so_reuseport) cacheable: true - name: Set variables for custom PostgreSQL data and WAL directory test diff --git a/roles/upgrade/README.md b/roles/upgrade/README.md index dc696ab2f..87776aac4 100644 --- a/roles/upgrade/README.md +++ b/roles/upgrade/README.md @@ -129,8 +129,8 @@ Please see the variable file vars/[upgrade.yml](../../vars/upgrade.yml) - **Check if PostgreSQL tablespaces exist** - Print tablespace location (if exists) - Note: If tablespaces are present they will be upgraded (step 5) on replicas using rsync -- **Test PgBouncer access via localhost** - - test access via 'localhost' to be able to perform 'PAUSE' command +- **Test PgBouncer access via unix socket** + - test access via unix socket to be able to perform 'PAUSE' command - **Make sure that the cluster ip address (VIP) is running** - Notes: if 'cluster_vip' is defined diff --git a/roles/upgrade/tasks/pgbouncer_pause.yml b/roles/upgrade/tasks/pgbouncer_pause.yml index b4222dd52..ed6359a6f 100644 --- a/roles/upgrade/tasks/pgbouncer_pause.yml +++ b/roles/upgrade/tasks/pgbouncer_pause.yml @@ -36,32 +36,24 @@ and state <> 'idle' and query_start < clock_timestamp() - interval '{{ pg_slow_active_query_treshold_to_terminate }} ms' {{ "and backend_type = 'client backend'" if pg_old_version is version('10', '>=') else '' }} - # Depending on the number of PgBouncer processes, it either uses a local connection or the socket paths for each PgBouncer process. - pgbouncer_pause_command: >- - {% if pgbouncer_processes | default(1) | int == 1 %} - bash -c 'PGPASSWORD={{ patroni_superuser_password }} psql -h localhost -p {{ pgbouncer_listen_port }} -U {{ patroni_superuser_username }} -d pgbouncer -tAXc "PAUSE"' - {% else %} - {% set unix_socket_dir = [] %} - {% for i in range(0, pgbouncer_processes | default(1) | int) %} - {% if i == 0 %} - {{ unix_socket_dir.append('/var/run/pgbouncer') }} - {% else %} - {{ unix_socket_dir.append('/var/run/pgbouncer-' + i | string) }} - {% endif %} - {% endfor %} - echo -e "{{ unix_socket_dir | join('\n') }}" | xargs -I {} -P {{ pgbouncer_processes | default(1) | int }} bash -c 'PGPASSWORD={{ patroni_superuser_password }} psql -h {} -p {{ pgbouncer_listen_port }} -U {{ patroni_superuser_username }} -d pgbouncer -tAXc "PAUSE"' - {% endif %} + pgb_unix_socket_dirs: >- + {% set unix_socket_dir = ['/var/run/pgbouncer'] %} + {%- for idx in range(1, pgbouncer_processes | default(1) | int) -%} + {{ unix_socket_dir.append('/var/run/pgbouncer-' + (idx + 1) | string) }} + {%- endfor -%} + {{ unix_socket_dir | join(' ') }} ansible.builtin.shell: | set -o pipefail; pg_servers="{{ (groups['primary'] + groups['secondary']) | join('\n') }}" - pg_count=$(echo -e "$pg_servers" | wc -l) + pg_servers_count="{{ groups['primary'] | default([]) | length + groups['secondary'] | default([]) | length }}" pg_slow_active_count_query="{{ pg_slow_active_count_query }}" pg_slow_active_terminate_query="{{ pg_slow_active_terminate_query }}" # it is assumed that pgbouncer is installed on database servers pgb_servers="$pg_servers" - pgb_count="$pg_count" - pgb_pause_command="{{ pgbouncer_pause_command }}" + pgb_servers_count="$pg_servers_count" + pgb_count="{{ (groups['primary'] | default([]) | length + groups['secondary'] | default([]) | length) * (pgbouncer_processes | default(1) | int) }}" + pgb_pause_command="printf '%s\n' {{ pgb_unix_socket_dirs }} | xargs -I {} -P {{ pgbouncer_processes | default(1) | int }} -n 1 psql -h {} -p {{ pgbouncer_listen_port }} -U {{ patroni_superuser_username }} -d pgbouncer -tAXc 'PAUSE'" pgb_resume_command='kill -SIGUSR2 $(pidof pgbouncer)' start_time=$(date +%s) @@ -71,7 +63,7 @@ pgb_paused_count=0 # wait for the active queries to complete on pg_servers - IFS=$'\n' pg_slow_active_counts=($(echo -e "$pg_servers" | xargs -I {} -P "$pg_count" -n 1 ssh -o StrictHostKeyChecking=no {} "psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc \"$pg_slow_active_count_query\"")) + IFS=$'\n' pg_slow_active_counts=($(echo -e "$pg_servers" | xargs -I {} -P "$pg_servers_count" -n 1 ssh -o StrictHostKeyChecking=no {} "psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc \"$pg_slow_active_count_query\"")) # sum up all the values in the array total_pg_slow_active_count=0 @@ -83,7 +75,7 @@ if [[ "$total_pg_slow_active_count" == 0 ]]; then # pause pgbouncer on all pgb_servers. We send via ssh to all pgbouncers in parallel and collect results from all (maximum wait time 2 seconds) - IFS=$'\n' pause_results=($(echo -e "$pgb_servers" | xargs -I {} -P "$pgb_count" -n 1 ssh -o StrictHostKeyChecking=no {} "timeout 2 $pgb_pause_command 2>&1 || true")) + IFS=$'\n' pause_results=($(echo -e "$pgb_servers" | xargs -I {} -P "$pgb_servers_count" -n 1 ssh -o StrictHostKeyChecking=no {} "timeout 2 $pgb_pause_command 2>&1 || true")) echo "${pause_results[*]}" # analyze the pause_results array to count the number of paused pgbouncers pgb_paused_count=$(echo "${pause_results[*]}" | grep -o -e "PAUSE" -e "already suspended/paused" | wc -l) @@ -95,14 +87,14 @@ break # pause is performed on all pgb_servers, exit from the loop elif [[ "$pgb_paused_count" -gt 0 && "$pgb_paused_count" -ne "$pgb_count" ]]; then # pause is not performed on all pgb_servers, perform resume (we do not use timeout because we mast to resume all pgbouncers) - IFS=$'\n' resume_results=($(echo -e "$pgb_servers" | xargs -I {} -P "$pgb_count" -n 1 ssh -o StrictHostKeyChecking=no {} "$pgb_resume_command 2>&1 || true")) + IFS=$'\n' resume_results=($(echo -e "$pgb_servers" | xargs -I {} -P "$pgb_servers_count" -n 1 ssh -o StrictHostKeyChecking=no {} "$pgb_resume_command 2>&1 || true")) echo "${resume_results[*]}" fi # after 30 seconds of waiting, terminate active sessions on pg_servers and try pausing again if (( current_time - start_time >= {{ pgbouncer_pool_pause_terminate_after }} )); then echo "$(date): terminate active queries" - echo -e "$pg_servers" | xargs -I {} -P "$pg_count" -n 1 ssh -o StrictHostKeyChecking=no {} "psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc \"$pg_slow_active_terminate_query\"" + echo -e "$pg_servers" | xargs -I {} -P "$pg_servers_count" -n 1 ssh -o StrictHostKeyChecking=no {} "psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc \"$pg_slow_active_terminate_query\"" fi # if it was not possible to pause for 60 seconds, exit with an error diff --git a/roles/upgrade/tasks/pre_checks.yml b/roles/upgrade/tasks/pre_checks.yml index 2135dbcee..98ae9f801 100644 --- a/roles/upgrade/tasks/pre_checks.yml +++ b/roles/upgrade/tasks/pre_checks.yml @@ -334,22 +334,7 @@ - tablespace_location.stdout_lines | length > 0 # PgBouncer (if 'pgbouncer_pool_pause' is 'true') -# test access via localhost to be able to perform 'PAUSE' command -- name: '[Pre-Check] Test PgBouncer access via localhost' - ansible.builtin.command: >- - psql -h localhost - -p {{ pgbouncer_listen_port }} - -U {{ patroni_superuser_username }} - -d pgbouncer - -tAXc "SHOW POOLS" - changed_when: false - environment: - PGPASSWORD: "{{ patroni_superuser_password }}" - when: - - pgbouncer_install | bool - - pgbouncer_pool_pause | bool - - pgbouncer_processes | default(1) | int == 1 - +# test access via unix socket to be able to perform 'PAUSE' command - name: '[Pre-Check] Test PgBouncer access via unix socket' ansible.builtin.command: >- psql -h /var/run/pgbouncer{{ '-%d' % (idx + 1) if idx > 0 else '' }} @@ -362,12 +347,9 @@ index_var: idx label: "{{ 'pgbouncer' if idx == 0 else 'pgbouncer-%d' % (idx + 1) }}" changed_when: false - environment: - PGPASSWORD: "{{ patroni_superuser_password }}" when: - pgbouncer_install | bool - pgbouncer_pool_pause | bool - - pgbouncer_processes | default(1) | int > 1 # Check the VIP address - name: Make sure that the cluster ip address (VIP) "{{ cluster_vip }}" is running diff --git a/vars/main.yml b/vars/main.yml index ca2e4268a..46198ea5c 100644 --- a/vars/main.yml +++ b/vars/main.yml @@ -289,8 +289,8 @@ postgresql_pg_ident: [] # the password file (~/.pgpass) postgresql_pgpass: - "localhost:{{ postgresql_port }}:*:{{ patroni_superuser_username }}:{{ patroni_superuser_password }}" - - "localhost:{{ pgbouncer_listen_port }}:*:{{ patroni_superuser_username }}:{{ patroni_superuser_password }}" - "{{ inventory_hostname }}:{{ postgresql_port }}:*:{{ patroni_superuser_username }}:{{ patroni_superuser_password }}" + - "*:{{ pgbouncer_listen_port }}:*:{{ patroni_superuser_username }}:{{ patroni_superuser_password }}" # - hostname:port:database:username:password