From 53f68c6f0d6365be5dc9fdc8a538931fba675b25 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 20 Sep 2024 13:27:22 +0300 Subject: [PATCH 01/28] Add cluster restore timeout Add patroni_cluster_restore_timeout variable --- automation/roles/patroni/tasks/main.yml | 18 +++++++++--------- automation/vars/main.yml | 1 + 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 123945fea..dc100d38f 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -648,7 +648,7 @@ ansible.builtin.command: > {{ pgbackrest_patroni_cluster_restore_command }} {{ '--target-action=promote' if pgbackrest_patroni_cluster_restore_command is search('--type=') else '' }} - async: 86400 # timeout 24 hours + async: "{{ patroni_cluster_restore_timeout | default(86400) }}" # timeout 24 hours poll: 0 register: pgbackrest_restore_master when: is_master | bool @@ -658,7 +658,7 @@ ansible.builtin.command: > {{ pgbackrest_patroni_cluster_restore_command }} {{ '--target-action=shutdown' if pgbackrest_patroni_cluster_restore_command is search('--type=') else '' }} - async: 86400 # timeout 24 hours + async: "{{ patroni_cluster_restore_timeout | default(86400) }}" # timeout 24 hours poll: 0 register: pgbackrest_restore_replica when: not is_master | bool and 'pgbackrest' in patroni_create_replica_methods @@ -673,7 +673,7 @@ label: "{{ item.changed }}" register: pgbackrest_restore_jobs_result until: pgbackrest_restore_jobs_result.finished - retries: 2880 # timeout 24 hours + retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 when: item.ansible_job_id is defined @@ -693,8 +693,8 @@ {{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "select pg_is_in_recovery()" register: pg_is_in_recovery - until: pg_is_in_recovery.stdout != "t" - retries: 1200 # timeout 10 hours + until: pg_is_in_recovery.stdout == "f" + retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 changed_when: false failed_when: false @@ -791,7 +791,7 @@ "select pg_is_in_recovery()" register: pg_is_in_recovery until: pg_is_in_recovery.stdout == "f" - retries: 1200 # timeout 10 hours + retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 changed_when: false failed_when: false @@ -853,8 +853,8 @@ {{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "select pg_is_in_recovery()" register: pg_is_in_recovery - until: pg_is_in_recovery.stdout != "t" - retries: 1200 # timeout 10 hours + until: pg_is_in_recovery.stdout == "f" + retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 changed_when: false when: is_master | bool @@ -961,7 +961,7 @@ status_code: 200 register: replica_result until: replica_result.status == 200 - retries: 1200 # timeout 10 hours + retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 environment: no_proxy: "{{ inventory_hostname }}" diff --git a/automation/vars/main.yml b/automation/vars/main.yml index 267a88229..7a2a0e78c 100644 --- a/automation/vars/main.yml +++ b/automation/vars/main.yml @@ -644,6 +644,7 @@ pgbackrest_cron_jobs: # PITR mode (if patroni_cluster_bootstrap_method: "pgbackrest" or "wal-g"): # 1) The database cluster directory will be cleaned (for "wal-g") or overwritten (for "pgbackrest" --delta restore). # 2) And also the patroni cluster "{{ patroni_cluster_name }}" will be removed from the DCS (if exist) before recovery. +patroni_cluster_restore_timeout: 86400 # backup and WAL restore timeout in seconds (24 hours) disable_archive_command: true # or 'false' to not disable archive_command after restore keep_patroni_dynamic_json: true # or 'false' to remove patroni.dynamic.json after restore (if exists) From 9770ee0e132ab7ed58cb51acecbd8d0edd70c08f Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 20 Sep 2024 13:32:46 +0300 Subject: [PATCH 02/28] Add pg_ctl_timeout variable --- automation/roles/patroni/tasks/main.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index dc100d38f..13dcea748 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -507,7 +507,7 @@ - name: Prepare PostgreSQL | start PostgreSQL on Master become: true become_user: postgres - ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -w -t 1800" + ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -w -t {{ pg_ctl_timeout | default(3600) }}" when: pg_ctl_status_result.rc == 3 - name: Prepare PostgreSQL | check PostgreSQL is accepting connections @@ -584,7 +584,7 @@ - name: Prepare PostgreSQL | stop PostgreSQL (will be managed by patroni) become: true become_user: postgres - ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t 1800" + ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t {{ pg_ctl_timeout | default(3600) }}" when: checkpoint_result.rc is defined and checkpoint_result.rc == 0 - name: Prepare PostgreSQL | check PostgreSQL is stopped @@ -685,7 +685,8 @@ when: not keep_patroni_dynamic_json|bool - name: Start PostgreSQL for Recovery - ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -o '-c hot_standby=off' -w -t 1800" + ansible.builtin.command: >- + "{{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -o '-c hot_standby=off' -w -t {{ pg_ctl_timeout | default(3600) }}" when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) - name: Waiting for PostgreSQL Recovery to complete (WAL apply) @@ -707,7 +708,7 @@ failed_when: false - name: Stop PostgreSQL - ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t 1800" + ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t {{ pg_ctl_timeout | default(3600) }}" when: pg_ctl_status_result.rc is defined and (pg_ctl_status_result.rc != 3 and pg_ctl_status_result.rc != 4) when: patroni_cluster_bootstrap_method == "pgbackrest" become: true From b9a64c4bee714ee16185ed5ee54269bb9d16f03c Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 20 Sep 2024 13:51:26 +0300 Subject: [PATCH 03/28] Add Standby cluster initialization check --- automation/roles/patroni/tasks/main.yml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 13dcea748..1e73858ec 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -798,7 +798,21 @@ failed_when: false when: patroni_cluster_bootstrap_method == "wal-g" - - name: Check PostgreSQL is started and accepting connections on Master + - name: Wait for the Standby cluster initialization to complete + ansible.builtin.uri: + url: "http://{{ inventory_hostname }}:{{ patroni_restapi_port }}/standby-leader" + status_code: 200 + register: standby_leader_result + until: standby_leader_result.status_code == 200 + retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours + delay: 30 + environment: + no_proxy: "{{ inventory_hostname }}" + when: + - (patroni_standby_cluster.host is defined and patroni_standby_cluster.host | length > 0) + - not ansible_check_mode + + - name: Check PostgreSQL is started and accepting connections become: true become_user: postgres ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_isready -p {{ postgresql_port }}" From 6b3a050f39b69d20bd34d5a2b76a3b9df11f9a27 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Mon, 23 Sep 2024 12:59:46 +0300 Subject: [PATCH 04/28] cluster_restore_timeout Rename patroni_cluster_restore_timeout to cluster_restore_timeout to make the variable shorter. --- automation/roles/patroni/tasks/main.yml | 16 ++++++++-------- automation/vars/main.yml | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 1e73858ec..c9e0d4903 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -648,7 +648,7 @@ ansible.builtin.command: > {{ pgbackrest_patroni_cluster_restore_command }} {{ '--target-action=promote' if pgbackrest_patroni_cluster_restore_command is search('--type=') else '' }} - async: "{{ patroni_cluster_restore_timeout | default(86400) }}" # timeout 24 hours + async: "{{ cluster_restore_timeout | default(86400) }}" # timeout 24 hours poll: 0 register: pgbackrest_restore_master when: is_master | bool @@ -658,7 +658,7 @@ ansible.builtin.command: > {{ pgbackrest_patroni_cluster_restore_command }} {{ '--target-action=shutdown' if pgbackrest_patroni_cluster_restore_command is search('--type=') else '' }} - async: "{{ patroni_cluster_restore_timeout | default(86400) }}" # timeout 24 hours + async: "{{ cluster_restore_timeout | default(86400) }}" # timeout 24 hours poll: 0 register: pgbackrest_restore_replica when: not is_master | bool and 'pgbackrest' in patroni_create_replica_methods @@ -673,7 +673,7 @@ label: "{{ item.changed }}" register: pgbackrest_restore_jobs_result until: pgbackrest_restore_jobs_result.finished - retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 when: item.ansible_job_id is defined @@ -695,7 +695,7 @@ "select pg_is_in_recovery()" register: pg_is_in_recovery until: pg_is_in_recovery.stdout == "f" - retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 changed_when: false failed_when: false @@ -792,7 +792,7 @@ "select pg_is_in_recovery()" register: pg_is_in_recovery until: pg_is_in_recovery.stdout == "f" - retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 changed_when: false failed_when: false @@ -804,7 +804,7 @@ status_code: 200 register: standby_leader_result until: standby_leader_result.status_code == 200 - retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 environment: no_proxy: "{{ inventory_hostname }}" @@ -869,7 +869,7 @@ "select pg_is_in_recovery()" register: pg_is_in_recovery until: pg_is_in_recovery.stdout == "f" - retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 changed_when: false when: is_master | bool @@ -976,7 +976,7 @@ status_code: 200 register: replica_result until: replica_result.status == 200 - retries: "{{ (patroni_cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours + retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 environment: no_proxy: "{{ inventory_hostname }}" diff --git a/automation/vars/main.yml b/automation/vars/main.yml index 7a2a0e78c..831bc92e0 100644 --- a/automation/vars/main.yml +++ b/automation/vars/main.yml @@ -644,7 +644,7 @@ pgbackrest_cron_jobs: # PITR mode (if patroni_cluster_bootstrap_method: "pgbackrest" or "wal-g"): # 1) The database cluster directory will be cleaned (for "wal-g") or overwritten (for "pgbackrest" --delta restore). # 2) And also the patroni cluster "{{ patroni_cluster_name }}" will be removed from the DCS (if exist) before recovery. -patroni_cluster_restore_timeout: 86400 # backup and WAL restore timeout in seconds (24 hours) +cluster_restore_timeout: 86400 # backup and WAL restore timeout in seconds (24 hours) disable_archive_command: true # or 'false' to not disable archive_command after restore keep_patroni_dynamic_json: true # or 'false' to remove patroni.dynamic.json after restore (if exists) From 59df9d03ba113d2960f82fac7a238fe0e7cee86d Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Mon, 30 Sep 2024 11:12:51 +0300 Subject: [PATCH 05/28] remove duplicate pgbackrest vars from the inventory --- automation/inventory | 4 ---- 1 file changed, 4 deletions(-) diff --git a/automation/inventory b/automation/inventory index 9f7628657..17bbf3980 100644 --- a/automation/inventory +++ b/automation/inventory @@ -61,7 +61,3 @@ ansible_ssh_port='22' #ansible_ssh_pass='secretpassword' # "sshpass" package is required for use "ansible_ssh_pass" #ansible_ssh_private_key_file= #ansible_python_interpreter='/usr/bin/python3' - -[pgbackrest:vars] -#ansible_user='postgres' -#ansible_ssh_pass='secretpassword' From dd786eec8b6ab3922b3ab0c00144bd32d40d65aa Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Mon, 30 Sep 2024 12:48:07 +0300 Subject: [PATCH 06/28] update task "Start PostgreSQL for Recovery" --- automation/roles/patroni/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index c9e0d4903..e472a6201 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -686,7 +686,7 @@ - name: Start PostgreSQL for Recovery ansible.builtin.command: >- - "{{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -o '-c hot_standby=off' -w -t {{ pg_ctl_timeout | default(3600) }}" + {{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -o '-c hot_standby=off' -w -t {{ pg_ctl_timeout | default(3600) }} when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) - name: Waiting for PostgreSQL Recovery to complete (WAL apply) From 9d1079a7f01618bb2b828b86c3aa2e3ad5555c95 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Mon, 30 Sep 2024 10:13:31 +0000 Subject: [PATCH 07/28] temporarily disable tests for Citus (repository problem: Error 402) --- automation/molecule/default/converge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/molecule/default/converge.yml b/automation/molecule/default/converge.yml index 27248f77a..50f74f401 100644 --- a/automation/molecule/default/converge.yml +++ b/automation/molecule/default/converge.yml @@ -60,7 +60,7 @@ enable_pg_stat_kcache: true enable_pg_wait_sampling: true enable_pg_partman: true - enable_citus: "{{ 'false' if ansible_distribution_version == '24.04' else 'true' }}" # TODO Ubuntu 24.04 + enable_citus: false # "{{ 'false' if ansible_distribution_version == '24.04' else 'true' }}" # TODO Ubuntu 24.04 enable_paradedb: "{{ 'false' if ansible_distribution_release == 'bullseye' else 'true' }}" # pg_search and pg_analytics (no packages for debian 11) enable_pgvectorscale: "{{ 'true' if ansible_distribution_release in ['bookworm', 'jammy', 'noble'] else 'false' }}" # only deb packages are available # create extension From c13fe1cea4f4cb5b6cbad1a1a5182312fd5aaedf Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Mon, 30 Sep 2024 17:55:26 +0300 Subject: [PATCH 08/28] fix typo --- automation/roles/patroni/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index e472a6201..d3d966f7a 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -803,7 +803,7 @@ url: "http://{{ inventory_hostname }}:{{ patroni_restapi_port }}/standby-leader" status_code: 200 register: standby_leader_result - until: standby_leader_result.status_code == 200 + until: standby_leader_result.status == 200 retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours delay: 30 environment: From e53e0f2552e8264f4770eef9e97cad31afe42d26 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Tue, 1 Oct 2024 12:21:40 +0300 Subject: [PATCH 09/28] enable citus tests --- automation/molecule/default/converge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/molecule/default/converge.yml b/automation/molecule/default/converge.yml index 50f74f401..27248f77a 100644 --- a/automation/molecule/default/converge.yml +++ b/automation/molecule/default/converge.yml @@ -60,7 +60,7 @@ enable_pg_stat_kcache: true enable_pg_wait_sampling: true enable_pg_partman: true - enable_citus: false # "{{ 'false' if ansible_distribution_version == '24.04' else 'true' }}" # TODO Ubuntu 24.04 + enable_citus: "{{ 'false' if ansible_distribution_version == '24.04' else 'true' }}" # TODO Ubuntu 24.04 enable_paradedb: "{{ 'false' if ansible_distribution_release == 'bullseye' else 'true' }}" # pg_search and pg_analytics (no packages for debian 11) enable_pgvectorscale: "{{ 'true' if ansible_distribution_release in ['bookworm', 'jammy', 'noble'] else 'false' }}" # only deb packages are available # create extension From f95cbfdbc3a0802da9a6feef52209f99838e43ad Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Wed, 2 Oct 2024 14:41:42 +0300 Subject: [PATCH 10/28] Update main.yml --- automation/roles/patroni/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index d3d966f7a..048eecb69 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -699,7 +699,7 @@ delay: 30 changed_when: false failed_when: false - when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) + when: is_master | bool - name: Check that PostgreSQL is stopped ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl status -D {{ postgresql_data_dir }}" From 5bac0cd4e32434021a493d51cd15cbde8ac20629 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Wed, 2 Oct 2024 14:43:03 +0300 Subject: [PATCH 11/28] Update main.yml --- automation/vars/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/automation/vars/main.yml b/automation/vars/main.yml index 831bc92e0..b3b24edbe 100644 --- a/automation/vars/main.yml +++ b/automation/vars/main.yml @@ -460,11 +460,11 @@ patroni_create_replica_methods: - basebackup pgbackrest: - - { option: "command", value: "/usr/bin/pgbackrest --stanza={{ pgbackrest_stanza }} --delta restore" } + - { option: "command", value: "{{ pgbackrest_patroni_cluster_restore_command }}" } - { option: "keep_data", value: "True" } - { option: "no_params", value: "True" } wal_g: - - { option: "command", value: "{{ wal_g_path }} backup-fetch {{ postgresql_data_dir }} LATEST" } + - { option: "command", value: "{{ wal_g_patroni_cluster_bootstrap_command }}" } - { option: "no_params", value: "True" } basebackup: - { option: "max-rate", value: "1000M" } From a41ca81a46498c4e1443e8d90b0bae90f56bebe5 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Wed, 2 Oct 2024 14:50:21 +0300 Subject: [PATCH 12/28] formatting --- automation/roles/patroni/tasks/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 048eecb69..907a62e52 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -691,8 +691,7 @@ - name: Waiting for PostgreSQL Recovery to complete (WAL apply) ansible.builtin.command: >- - {{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc - "select pg_is_in_recovery()" + {{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "select pg_is_in_recovery()" register: pg_is_in_recovery until: pg_is_in_recovery.stdout == "f" retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours From c1769f40c92e2db690f85d86ee232d2ef62930aa Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Wed, 2 Oct 2024 15:15:01 +0300 Subject: [PATCH 13/28] Check PostgreSQL is started --- automation/roles/patroni/tasks/main.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 907a62e52..7bde17d85 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -689,6 +689,15 @@ {{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -o '-c hot_standby=off' -w -t {{ pg_ctl_timeout | default(3600) }} when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) + - name: Check PostgreSQL is started and accepting connections + ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_isready -p {{ postgresql_port }}" + register: pg_isready_result + until: pg_isready_result.rc == 0 + retries: 3 + delay: 10 + changed_when: false + when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) + - name: Waiting for PostgreSQL Recovery to complete (WAL apply) ansible.builtin.command: >- {{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "select pg_is_in_recovery()" From e7524f05c625206eb2d04e8459fe9a56cb873192 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Wed, 2 Oct 2024 15:18:59 +0300 Subject: [PATCH 14/28] Update main.yml --- automation/roles/patroni/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 7bde17d85..cfe05c03c 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -696,7 +696,7 @@ retries: 3 delay: 10 changed_when: false - when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) + when: is_master | bool - name: Waiting for PostgreSQL Recovery to complete (WAL apply) ansible.builtin.command: >- From 54ea8b85921654c96d98ef4ba5b49d676fce6296 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Wed, 2 Oct 2024 15:48:38 +0300 Subject: [PATCH 15/28] Update main.yml --- automation/roles/patroni/tasks/main.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index cfe05c03c..a0fd43044 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -685,15 +685,17 @@ when: not keep_patroni_dynamic_json|bool - name: Start PostgreSQL for Recovery - ansible.builtin.command: >- + ansible.builtin.shell: > {{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -o '-c hot_standby=off' -w -t {{ pg_ctl_timeout | default(3600) }} + args: + executable: /bin/bash when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) - name: Check PostgreSQL is started and accepting connections ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_isready -p {{ postgresql_port }}" register: pg_isready_result until: pg_isready_result.rc == 0 - retries: 3 + retries: 30 delay: 10 changed_when: false when: is_master | bool From c4a30899f77cc95d9782aef55f2c09680ebab55c Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Mon, 21 Oct 2024 14:28:04 +0300 Subject: [PATCH 16/28] Update main.yml --- automation/roles/patroni/tasks/main.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index a0fd43044..558a78798 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -685,10 +685,8 @@ when: not keep_patroni_dynamic_json|bool - name: Start PostgreSQL for Recovery - ansible.builtin.shell: > + ansible.builtin.command: >- {{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -o '-c hot_standby=off' -w -t {{ pg_ctl_timeout | default(3600) }} - args: - executable: /bin/bash when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) - name: Check PostgreSQL is started and accepting connections From 372be89caced54c96a47b5399a126830591ced27 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 1 Nov 2024 14:51:53 +0300 Subject: [PATCH 17/28] Update pg_ctl start command --- automation/roles/patroni/tasks/main.yml | 30 ++++++++++++++++--------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 558a78798..c63730018 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -686,21 +686,28 @@ - name: Start PostgreSQL for Recovery ansible.builtin.command: >- - {{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -o '-c hot_standby=off' -w -t {{ pg_ctl_timeout | default(3600) }} + {{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -w -t {{ pg_ctl_timeout | default(3600) }} + -o '--config-file={{ postgresql_conf_dir }}/postgresql.conf' + -o '-c hot_standby=off' + -l /tmp/pg_recovery_{{ ansible_date_time.date }}.log + async: "{{ pg_ctl_timeout | default(3600) }}" # run the command asynchronously + poll: 0 + register: pg_ctl_start_result when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) - - name: Check PostgreSQL is started and accepting connections - ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_isready -p {{ postgresql_port }}" - register: pg_isready_result - until: pg_isready_result.rc == 0 - retries: 30 + - name: Wait for the PostgreSQL start command to complete + ansible.builtin.async_status: + jid: "{{ pg_ctl_start_result.ansible_job_id }}" + register: pg_ctl_start_job_result + until: pg_ctl_start_job_result.finished + retries: "{{ (pg_ctl_timeout | default(3600) | int) // 10 }}" delay: 10 - changed_when: false - when: is_master | bool + when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) - - name: Waiting for PostgreSQL Recovery to complete (WAL apply) + - name: Wait for PostgreSQL Recovery to complete (WAL apply) ansible.builtin.command: >- - {{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "select pg_is_in_recovery()" + {{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres + -tAXc "select pg_is_in_recovery()" register: pg_is_in_recovery until: pg_is_in_recovery.stdout == "f" retries: "{{ (cluster_restore_timeout | default(86400)) | int // 30 }}" # timeout 24 hours @@ -716,7 +723,8 @@ failed_when: false - name: Stop PostgreSQL - ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t {{ pg_ctl_timeout | default(3600) }}" + ansible.builtin.command: >- + {{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t {{ pg_ctl_timeout | default(3600) }} when: pg_ctl_status_result.rc is defined and (pg_ctl_status_result.rc != 3 and pg_ctl_status_result.rc != 4) when: patroni_cluster_bootstrap_method == "pgbackrest" become: true From 4048b3992389e953f3071ad32a47dccfbf4dcc0a Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 1 Nov 2024 14:54:33 +0300 Subject: [PATCH 18/28] Check that PostgreSQL is stopped --- automation/roles/patroni/tasks/main.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index c63730018..a1a815f44 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -625,6 +625,17 @@ state: stopped when: is_master | bool + - name: Check that PostgreSQL is stopped + ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl status -D {{ postgresql_data_dir }}" + register: pg_ctl_status_result + changed_when: false + failed_when: false + + - name: Stop PostgreSQL + ansible.builtin.command: >- + {{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t {{ pg_ctl_timeout | default(3600) }} + when: pg_ctl_status_result.rc is defined and (pg_ctl_status_result.rc != 3 and pg_ctl_status_result.rc != 4) + - name: Remove patroni cluster "{{ patroni_cluster_name }}" from DCS (if exist) become: true become_user: postgres From 16eebaaf0b809ca3220ef3fdba034a4e9cc37b7d Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 1 Nov 2024 14:58:38 +0300 Subject: [PATCH 19/28] disable archive_command after recovery --- automation/roles/patroni/tasks/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index a1a815f44..e01cb769f 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -700,6 +700,7 @@ {{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -w -t {{ pg_ctl_timeout | default(3600) }} -o '--config-file={{ postgresql_conf_dir }}/postgresql.conf' -o '-c hot_standby=off' + -0 '-c archive_command=/bin/true' -l /tmp/pg_recovery_{{ ansible_date_time.date }}.log async: "{{ pg_ctl_timeout | default(3600) }}" # run the command asynchronously poll: 0 From 83518f825ef8f3fe4657d6e9cbb55a198d7d26c7 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 1 Nov 2024 15:00:22 +0300 Subject: [PATCH 20/28] fix typo --- automation/roles/patroni/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index e01cb769f..b905b5e36 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -700,7 +700,7 @@ {{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -w -t {{ pg_ctl_timeout | default(3600) }} -o '--config-file={{ postgresql_conf_dir }}/postgresql.conf' -o '-c hot_standby=off' - -0 '-c archive_command=/bin/true' + -o '-c archive_command=/bin/true' -l /tmp/pg_recovery_{{ ansible_date_time.date }}.log async: "{{ pg_ctl_timeout | default(3600) }}" # run the command asynchronously poll: 0 From d72b41e58ced3ab67d040b9a0b5e1f8b8b1fec83 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 1 Nov 2024 15:04:51 +0300 Subject: [PATCH 21/28] add restore_command --- automation/roles/patroni/tasks/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index b905b5e36..51b8205c0 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -700,6 +700,9 @@ {{ postgresql_bin_dir }}/pg_ctl start -D {{ postgresql_data_dir }} -w -t {{ pg_ctl_timeout | default(3600) }} -o '--config-file={{ postgresql_conf_dir }}/postgresql.conf' -o '-c hot_standby=off' + {% if postgresql_version | int >= 12 %} + -o '-c restore_command="pgbackrest --stanza={{ pgbackrest_stanza }} archive-get %f %p"' + {% endif %} -o '-c archive_command=/bin/true' -l /tmp/pg_recovery_{{ ansible_date_time.date }}.log async: "{{ pg_ctl_timeout | default(3600) }}" # run the command asynchronously From d5efcaebfc5a1555bdcf145e09799172ea939e69 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 1 Nov 2024 15:29:58 +0300 Subject: [PATCH 22/28] Update main.yml --- automation/roles/patroni/tasks/main.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 51b8205c0..ec1a27348 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -598,7 +598,7 @@ tags: patroni, patroni_start_master - block: # PITR (custom bootstrap) - # Prepare (install pexpect, ruamel.yaml) + # Prepare (install pexpect, ruamel.yaml) - name: Prepare | Make sure the ansible required python library is exist ansible.builtin.pip: name: "{{ item }}" @@ -612,7 +612,8 @@ environment: PATH: "{{ ansible_env.PATH }}:/usr/local/bin:/usr/bin" PIP_BREAK_SYSTEM_PACKAGES: "1" - # Run PITR + + # Run PITR - name: Stop patroni service on the Replica servers (if running) ansible.builtin.systemd: name: patroni @@ -626,12 +627,16 @@ when: is_master | bool - name: Check that PostgreSQL is stopped + become: true + become_user: postgres ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl status -D {{ postgresql_data_dir }}" register: pg_ctl_status_result changed_when: false failed_when: false - name: Stop PostgreSQL + become: true + become_user: postgres ansible.builtin.command: >- {{ postgresql_bin_dir }}/pg_ctl stop -D {{ postgresql_data_dir }} -m fast -w -t {{ pg_ctl_timeout | default(3600) }} when: pg_ctl_status_result.rc is defined and (pg_ctl_status_result.rc != 3 and pg_ctl_status_result.rc != 4) @@ -719,7 +724,7 @@ delay: 10 when: is_master | bool or (not is_master | bool and 'pgbackrest' in patroni_create_replica_methods) - - name: Wait for PostgreSQL Recovery to complete (WAL apply) + - name: Wait for PostgreSQL recovery to complete (WAL apply) ansible.builtin.command: >- {{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "select pg_is_in_recovery()" From 269be4a2fde45fcb9e28ca66d8d127bab6281d7c Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 1 Nov 2024 15:54:25 +0300 Subject: [PATCH 23/28] Print recovery log --- automation/roles/patroni/tasks/main.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index ec1a27348..5b9821a9d 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -736,6 +736,18 @@ failed_when: false when: is_master | bool + - name: Get PostgreSQL recovery log + ansible.builtin.command: "grep -A2 'recovery stopping' /tmp/pg_recovery_{{ ansible_date_time.date }}.log" + register: pg_recovery_result + changed_when: false + failed_when: false + when: is_master | bool + + - name: Print PostgreSQL recovery log + ansible.builtin.debug: + msg: '{{ pg_recovery_result.stdout_lines }}' + when: pg_recovery_result.stdout_lines is defined + - name: Check that PostgreSQL is stopped ansible.builtin.command: "{{ postgresql_bin_dir }}/pg_ctl status -D {{ postgresql_data_dir }}" register: pg_ctl_status_result From 9118c58be6e289802e2e908bcb11468523255610 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 1 Nov 2024 15:58:49 +0300 Subject: [PATCH 24/28] Update main.yml --- automation/roles/patroni/tasks/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 5b9821a9d..6a2f0c1d3 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -771,6 +771,7 @@ ansible.builtin.stat: path: "{{ postgresql_data_dir }}/patroni.dynamic.json" register: patroni_dynamic_json + when: not keep_patroni_dynamic_json | bool - name: Remove patroni.dynamic.json file ansible.builtin.file: From 28efdc35571f622aacfcfa9addbf39a643d3730d Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 1 Nov 2024 16:08:36 +0300 Subject: [PATCH 25/28] Update main.yml --- automation/roles/patroni/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 6a2f0c1d3..96203300d 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -777,8 +777,8 @@ ansible.builtin.file: path: "{{ postgresql_data_dir }}/patroni.dynamic.json" state: absent - when: patroni_dynamic_json.stat.exists and - not keep_patroni_dynamic_json|bool + when: patroni_dynamic_json is defined and + patroni_dynamic_json.stat.exists - name: Edit patroni.dynamic.json | disable archive_command (if enabled) yedit: From 3540d145b63b74f92f1d7520cf59b87512217142 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 1 Nov 2024 16:17:52 +0300 Subject: [PATCH 26/28] update condition --- automation/roles/patroni/tasks/main.yml | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 96203300d..a69ea8b48 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -767,7 +767,7 @@ tags: patroni, point_in_time_recovery - block: # PITR (custom bootstrap) - disable archive_command - - name: Check the patroni.dynamic.json exists + - name: Check if patroni.dynamic.json exists ansible.builtin.stat: path: "{{ postgresql_data_dir }}/patroni.dynamic.json" register: patroni_dynamic_json @@ -777,8 +777,10 @@ ansible.builtin.file: path: "{{ postgresql_data_dir }}/patroni.dynamic.json" state: absent - when: patroni_dynamic_json is defined and - patroni_dynamic_json.stat.exists + when: + - patroni_dynamic_json is defined + - patroni_dynamic_json.stat is defined + - patroni_dynamic_json.stat.exists - name: Edit patroni.dynamic.json | disable archive_command (if enabled) yedit: @@ -786,18 +788,22 @@ key: postgresql.parameters.archive_command value: "cd ." # not doing anything yet with WAL-s content_type: json - when: patroni_dynamic_json.stat.exists and - keep_patroni_dynamic_json|bool and disable_archive_command|bool + when: + - disable_archive_command | bool + - patroni_dynamic_json is defined + - patroni_dynamic_json.stat is defined + - patroni_dynamic_json.stat.exists + - keep_patroni_dynamic_json | bool - name: Edit patroni.yml | disable archive_command (if enabled) yedit: src: /etc/patroni/patroni.yml key: bootstrap.dcs.postgresql.parameters.archive_command value: "cd ." # not doing anything yet with WAL-s - when: disable_archive_command|bool + when: disable_archive_command | bool when: patroni_cluster_bootstrap_method != "initdb" and - (pgbackrest_install|bool or wal_g_install|bool) and - (existing_pgcluster is not defined or not existing_pgcluster|bool) + (pgbackrest_install | bool or wal_g_install | bool) and + (existing_pgcluster is not defined or not existing_pgcluster | bool) become: true become_user: postgres tags: patroni, point_in_time_recovery From 704119ce8b5b5d8987145d2f51f34f04f4477e31 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Fri, 1 Nov 2024 16:20:20 +0300 Subject: [PATCH 27/28] Update main.yml --- automation/roles/patroni/tasks/main.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index a69ea8b48..5b3a310a3 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -788,12 +788,7 @@ key: postgresql.parameters.archive_command value: "cd ." # not doing anything yet with WAL-s content_type: json - when: - - disable_archive_command | bool - - patroni_dynamic_json is defined - - patroni_dynamic_json.stat is defined - - patroni_dynamic_json.stat.exists - - keep_patroni_dynamic_json | bool + when: disable_archive_command | bool - name: Edit patroni.yml | disable archive_command (if enabled) yedit: From d4e0063ea155102a455e4ef01f074cc8a7489d12 Mon Sep 17 00:00:00 2001 From: Vitaliy Kukharik Date: Thu, 7 Nov 2024 14:36:30 +0300 Subject: [PATCH 28/28] PostgreSQL recovery details --- automation/roles/patroni/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/automation/roles/patroni/tasks/main.yml b/automation/roles/patroni/tasks/main.yml index 5b3a310a3..709cf5ee4 100644 --- a/automation/roles/patroni/tasks/main.yml +++ b/automation/roles/patroni/tasks/main.yml @@ -736,14 +736,14 @@ failed_when: false when: is_master | bool - - name: Get PostgreSQL recovery log + - name: Check PostgreSQL recovery log ansible.builtin.command: "grep -A2 'recovery stopping' /tmp/pg_recovery_{{ ansible_date_time.date }}.log" register: pg_recovery_result changed_when: false failed_when: false when: is_master | bool - - name: Print PostgreSQL recovery log + - name: PostgreSQL recovery details ansible.builtin.debug: msg: '{{ pg_recovery_result.stdout_lines }}' when: pg_recovery_result.stdout_lines is defined