Skip to content

Commit

Permalink
Merge branch 'master' into cloud
Browse files Browse the repository at this point in the history
  • Loading branch information
vitabaks committed Mar 28, 2024
2 parents 2e02191 + 1a5eeb5 commit d7f5833
Show file tree
Hide file tree
Showing 15 changed files with 154 additions and 46 deletions.
35 changes: 20 additions & 15 deletions inventory
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
# The specified ip addresses will be used to listen by the cluster components.
# Attention! Specify private IP addresses so that the cluster does not listen a public IP addresses.
# For deploying via public IPs, add 'ansible_host=public_ip_address' variable for each node.

# "postgresql_exists='true'" if PostgreSQL is already exists and running
#
# "postgresql_exists=true" if PostgreSQL is already exists and running
# "hostname=" variable is optional (used to change the server name)
# "new_node=true" to add a new server to an existing cluster using the add_pgnode.yml playbook

# In this example, all components will be installed on PostgreSQL nodes.
# You can deploy the haproxy balancers and the etcd or consul cluster on other dedicated servers (recomended).
# patroni_tags="key=value" the Patroni tags in "key=value" format separated by commas.
# balancer_tags="key=value" the Balancer tags for the /replica, /sync, /async endpoints. Add the tag to the 'patroni_tags' variable first.

# if dcs_exists: false and dcs_type: "etcd"
[etcd_cluster] # recommendation: 3, or 5-7 nodes
Expand All @@ -21,24 +20,26 @@
#10.128.64.140 consul_node_role=server consul_bootstrap_expect=true consul_datacenter=dc1
#10.128.64.142 consul_node_role=server consul_bootstrap_expect=true consul_datacenter=dc1
#10.128.64.143 consul_node_role=server consul_bootstrap_expect=true consul_datacenter=dc1
#10.128.64.144 consul_node_role=client consul_datacenter=dc1
#10.128.64.144 consul_node_role=client consul_datacenter=dc2
#10.128.64.145 consul_node_role=client consul_datacenter=dc2

# if with_haproxy_load_balancing: true
[balancers]
#10.128.64.140
#10.128.64.142
#10.128.64.143
#10.128.64.144 new_node=true
#10.128.64.140 # balancer_tags="datacenter=dc1"
#10.128.64.142 # balancer_tags="datacenter=dc1"
#10.128.64.143 # balancer_tags="datacenter=dc1"
#10.128.64.144 balancer_tags="datacenter=dc2"
#10.128.64.145 balancer_tags="datacenter=dc2" new_node=true

# PostgreSQL nodes
[master]
#10.128.64.140 hostname=pgnode01 postgresql_exists=false
#10.128.64.140 hostname=pgnode01 postgresql_exists=false # patroni_tags="datacenter=dc1"

[replica]
#10.128.64.142 hostname=pgnode02 postgresql_exists=false
#10.128.64.143 hostname=pgnode03 postgresql_exists=false
#10.128.64.144 hostname=pgnode04 postgresql_exists=false new_node=true
#10.128.64.142 hostname=pgnode02 postgresql_exists=false # patroni_tags="datacenter=dc1"
#10.128.64.143 hostname=pgnode03 postgresql_exists=false # patroni_tags="datacenter=dc1"
#10.128.64.144 hostname=pgnode04 postgresql_exists=false patroni_tags="datacenter=dc2"
#10.128.64.145 hostname=pgnode04 postgresql_exists=false patroni_tags="datacenter=dc2" new_node=true

[postgres_cluster:children]
master
Expand All @@ -59,4 +60,8 @@ ansible_ssh_port='22'
#ansible_user='root'
#ansible_ssh_pass='secretpassword' # "sshpass" package is required for use "ansible_ssh_pass"
#ansible_ssh_private_key_file=
#ansible_python_interpreter='/usr/bin/python3'
#ansible_python_interpreter='/usr/bin/python3' # is required for use python3

[pgbackrest:vars]
#ansible_user='postgres'
#ansible_ssh_pass='secretpassword'
2 changes: 2 additions & 0 deletions molecule/default/converge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
consul_bootstrap_expect: true # if dcs_type: "consul"
postgresql_version: "16" # to test custom WAL dir
pgbouncer_processes: 2 # Test multiple pgbouncer processes (so_reuseport)
patroni_tags: "datacenter=dc1,key1=value1"
balancer_tags: "datacenter=dc1"
cacheable: true
delegate_to: localhost
run_once: true # noqa run-once
Expand Down
12 changes: 6 additions & 6 deletions roles/confd/templates/haproxy.tmpl.j2
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ listen replicas
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /replica?lag={{ patroni_maximum_lag_on_replica }}
option httpchk OPTIONS /replica?lag={{ patroni_maximum_lag_on_replica }}{% if balancer_tags | default('') | length > 0 %}{{ '&' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand All @@ -91,7 +91,7 @@ listen replicas_direct
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /replica?lag={{ patroni_maximum_lag_on_replica }}
option httpchk OPTIONS /replica?lag={{ patroni_maximum_lag_on_replica }}{% if balancer_tags | default('') | length > 0 %}{{ '&' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand All @@ -107,7 +107,7 @@ listen replicas_sync
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /sync
option httpchk OPTIONS /sync{% if balancer_tags | default('') | length > 0 %}{{ '?' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand All @@ -129,7 +129,7 @@ listen replicas_sync_direct
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /sync
option httpchk OPTIONS /sync{% if balancer_tags | default('') | length > 0 %}{{ '?' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand All @@ -145,7 +145,7 @@ listen replicas_async
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /async?lag={{ patroni_maximum_lag_on_replica }}
option httpchk OPTIONS /async?lag={{ patroni_maximum_lag_on_replica }}{% if balancer_tags | default('') | length > 0 %}{{ '&' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand All @@ -167,7 +167,7 @@ listen replicas_async_direct
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /async?lag={{ patroni_maximum_lag_on_replica }}
option httpchk OPTIONS /async?lag={{ patroni_maximum_lag_on_replica }}{% if balancer_tags | default('') | length > 0 %}{{ '&' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand Down
12 changes: 6 additions & 6 deletions roles/haproxy/templates/haproxy.cfg.j2
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ listen replicas
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /replica?lag={{ patroni_maximum_lag_on_replica }}
option httpchk OPTIONS /replica?lag={{ patroni_maximum_lag_on_replica }}{% if balancer_tags | default('') | length > 0 %}{{ '&' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand All @@ -96,7 +96,7 @@ listen replicas_direct
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /replica?lag={{ patroni_maximum_lag_on_replica }}
option httpchk OPTIONS /replica?lag={{ patroni_maximum_lag_on_replica }}{% if balancer_tags | default('') | length > 0 %}{{ '&' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand All @@ -113,7 +113,7 @@ listen replicas_sync
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /sync
option httpchk OPTIONS /sync{% if balancer_tags | default('') | length > 0 %}{{ '?' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand All @@ -137,7 +137,7 @@ listen replicas_sync_direct
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /sync
option httpchk OPTIONS /sync{% if balancer_tags | default('') | length > 0 %}{{ '?' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand All @@ -154,7 +154,7 @@ listen replicas_async
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /async?lag={{ patroni_maximum_lag_on_replica }}
option httpchk OPTIONS /async?lag={{ patroni_maximum_lag_on_replica }}{% if balancer_tags | default('') | length > 0 %}{{ '&' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand All @@ -178,7 +178,7 @@ listen replicas_async_direct
{% endif %}
maxconn {{ haproxy_maxconn.replica }}
option tcplog
option httpchk OPTIONS /async?lag={{ patroni_maximum_lag_on_replica }}
option httpchk OPTIONS /async?lag={{ patroni_maximum_lag_on_replica }}{% if balancer_tags | default('') | length > 0 %}{{ '&' + balancer_tags.split(',') | map('trim') | map('regex_replace', '([^=]+)=(.*)', 'tag_\\1=\\2') | join('&') + '\n' }}{% endif %}
balance roundrobin
http-check expect status 200
default-server inter 3s fastinter 1s fall 3 rise 2 on-marked-down shutdown-sessions
Expand Down
16 changes: 14 additions & 2 deletions roles/patroni/templates/patroni.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,22 @@ watchdog:
safety_margin: 5

tags:
nofailover: false
{% if patroni_tags is defined and patroni_tags | length > 0 %}
{{ patroni_tags | replace(" ", "") | replace("=", ": ") | replace(",", "\n ") }}
{% endif %}
{% set normalized_tags = patroni_tags | default('') | replace(" ", "") %}
{% if 'nosync=' not in normalized_tags %}
nosync: false
{% endif %}
{% if 'noloadbalance=' not in normalized_tags %}
noloadbalance: false
{% endif %}
{% if 'nofailover=' not in normalized_tags %}
nofailover: false
{% endif %}
{% if 'clonefrom=' not in normalized_tags %}
clonefrom: false
nosync: false
{% endif %}

# specify a node to replicate from (cascading replication)
# replicatefrom: (node name)
Expand Down
13 changes: 8 additions & 5 deletions roles/upgrade/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ On average, the PgBouncer pause duration is approximately 30 seconds. However, f

This playbook performs a rollback of a PostgreSQL upgrade.

Note: In some scenarios, if errors occur, the pg_upgrade.yml playbook may automatically initiate a rollback. Alternatively, if the automatic rollback does not occur, you can manually execute the pg_upgrade_rollback.yml playbook to revert the changes.

```bash
ansible-playbook pg_upgrade_rollback.yml
```
Expand Down Expand Up @@ -182,7 +184,8 @@ Please see the variable file vars/[upgrade.yml](../../vars/upgrade.yml)
- Print the result of the pg_upgrade check

#### 4. PRE-UPGRADE: Prepare the Patroni configuration
- Edit patroni.yml
- Backup the patroni.yml configuration file
- Edit the patroni.yml configuration file
- **Update parameters**: `data_dir`, `bin_dir`, `config_dir`
- **Check if the 'standby_cluster' parameter is specified**
- Remove parameters: `standby_cluster` (if exists)
Expand Down Expand Up @@ -226,7 +229,7 @@ Please see the variable file vars/[upgrade.yml](../../vars/upgrade.yml)
- Notes: max wait time: 2 minutes
- Stop, if replication lag is high
- Perform rollback
- Print error message: "There's a replication lag in the PostgreSQL Cluster. Please try again later"
- Print error message: "There's a replication lag in the PostgreSQL Cluster. Please try again later"
- **Perform PAUSE on all pgbouncers servers**
- Notes: if 'pgbouncer_install' is 'true' and 'pgbouncer_pool_pause' is 'true'
- Notes: pgbouncer pause script (details in [pgbouncer_pause.yml](tasks/pgbouncer_pause.yml)) performs the following actions:
Expand All @@ -236,7 +239,7 @@ Please see the variable file vars/[upgrade.yml](../../vars/upgrade.yml)
- If active queries do not complete within 30 seconds (`pgbouncer_pool_pause_terminate_after` variable), the script terminates slow active queries (longer than `pg_slow_active_query_treshold_to_terminate`).
- If after that it is still not possible to pause the pgbouncer servers within 60 seconds (`pgbouncer_pool_pause_stop_after` variable) from the start of the script, the script exits with an error.
- Perform rollback
- Print error message: "PgBouncer pools could not be paused, please try again later."
- Print error message: "PgBouncer pools could not be paused, please try again later."
- **Stop PostgreSQL** on the Leader and Replicas
- Check if old PostgreSQL is stopped
- Check if new PostgreSQL is stopped
Expand All @@ -248,9 +251,9 @@ Please see the variable file vars/[upgrade.yml](../../vars/upgrade.yml)
- "'Latest checkpoint location' is the same on the leader and its standbys"
- if 'Latest checkpoint location' values doesn't match
- Perform rollback
- Stop with error message:
- "Latest checkpoint location' doesn't match on leader and its standbys. Please try again later"
- Print error message: "Latest checkpoint location' doesn't match on leader and its standbys. Please try again later"
- **Upgrade the PostgreSQL on the Primary** (using pg_upgrade --link)
- Perform rollback, if the upgrade failed
- Print the result of the pg_upgrade
- **Make sure that the new data directory are empty on the Replica**
- **Upgrade the PostgreSQL on the Replica** (using rsync --hard-links)
Expand Down
5 changes: 5 additions & 0 deletions roles/upgrade/tasks/extensions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
{{ pg_new_bindir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"select datname from pg_catalog.pg_database where datname <> 'template0'"
register: databases_list
until: databases_list is success
delay: 5
retries: 3
changed_when: false
ignore_errors: true # show the error and continue the playbook execution
when:
- inventory_hostname in groups['primary']

Expand All @@ -15,6 +19,7 @@
loop_control:
loop_var: pg_target_dbname
when:
- databases_list is success
- databases_list.stdout_lines is defined
- databases_list.stdout_lines | length > 0

Expand Down
9 changes: 9 additions & 0 deletions roles/upgrade/tasks/post_checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
{{ pg_new_bindir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"drop table IF EXISTS test_replication;
create table test_replication as select generate_series(1, 10000)"
register: create_table_result
until: create_table_result is success
delay: 5
retries: 3
ignore_errors: true # show the error and continue the playbook execution
when:
- inventory_hostname in groups['primary']

Expand All @@ -46,13 +51,15 @@
failed_when: false
when:
- inventory_hostname in groups['secondary']
- create_table_result is success

- name: Drop a table "test_replication"
ansible.builtin.command: >-
{{ pg_new_bindir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"drop table IF EXISTS test_replication"
when:
- inventory_hostname in groups['primary']
- create_table_result is success

- name: Print the result of checking the number of records
ansible.builtin.debug:
Expand All @@ -61,6 +68,7 @@
- "The number of records in the test_replication table the same as the Primary ({{ count_test.stdout }} rows)"
when:
- inventory_hostname in groups['secondary']
- count_test.stdout is defined
- count_test.stdout | int == 10000

# Error, if the number of records in the "test_replication" table does not match the Primary.
Expand All @@ -74,6 +82,7 @@
ignore_errors: true # show the error and continue the playbook execution
when:
- inventory_hostname in groups['secondary']
- count_test.stdout is defined
- count_test.stdout | int != 10000

...
Loading

0 comments on commit d7f5833

Please sign in to comment.