From ec23be60867295f5968e0fcb81398fa7b244837d Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Thu, 15 Apr 2021 09:32:34 +0100 Subject: [PATCH] Fix for non-nitro AWS ephemeral disks. Refactor for multiple clouds. (#87) + Update blockdevmap.py to fix problem with non-nitro AWS ephemeral disks. + Refactor for multiple clouds + Remove inline aws and gcp code from generic yml files. E.g.: `clean_vms.yml` contains blocks for `when: cluster_vars.type == "aws"` and `when: cluster_vars.type == "gcp"`). Replace with cloud-specifc files, e.g.: `clean/tasks/aws.yml` and `clean/tasks/gcp.yml`. + Allows others to more easily add new cloud technologies (e.g. Azure or Vmware) without changing existing cloud files. + These changes don't introduce any functional differences, just a reorganisation. + Fix: test for `clean is defined` + Reuse get_cluster_hosts_state_{{buildenv}}.yml code to build dynamic inventory. + Add disk info to cluster_hosts_state to reduce code duplication and save on cloud calls during rollback with _scheme_rmvm_keepdisk_rollback + Add automatic checking of correct device mapping to disks_auto_aws_gcp.yml + Add dynamic_inventory as a dependency of redeployment --- EXAMPLE/README.md | 24 +-- .../aws/testid/cluster_vars__clusterid.yml | 1 - .../testid/eu-west-1/cluster_vars__region.yml | 2 +- .../sandbox/cluster_vars__buildenv.yml | 7 + EXAMPLE/cluster_defs/cluster_vars.yml | 5 +- .../cluster_defs/gcp/cluster_vars__cloud.yml | 2 +- .../test_aws_euw1/cluster_vars.yml | 2 +- .../test_gcp_euw1/cluster_vars.yml | 2 +- EXAMPLE/clusterverse_label_upgrade_v1-v2.yml | 6 +- README.md | 21 +-- _dependencies/filter_plugins/custom.py | 22 ++- _dependencies/library/blockdevmap.py | 178 ++++++++++++++++-- _dependencies/library/blockdevmap_LICENSE | 2 +- _dependencies/library/blockdevmap_README.md | 26 +-- _dependencies/library/deprecate_str.py | 1 + _dependencies/tasks/main.yml | 4 + clean/tasks/aws.yml | 36 ++++ clean/tasks/clean_networking.yml | 31 --- clean/tasks/clean_vms.yml | 65 ------- clean/tasks/{clean_dns.yml => dns.yml} | 6 +- clean/tasks/gcp.yml | 62 ++++++ clean/tasks/main.yml | 13 +- .../tasks/get_cluster_hosts_state.yml | 57 ------ .../tasks/get_cluster_hosts_state_aws.yml | 17 ++ .../tasks/get_cluster_hosts_state_gcp.yml | 26 +++ .../tasks/get_cluster_hosts_target.yml | 156 ++++----------- .../tasks/get_cluster_hosts_target_aws.yml | 78 ++++++++ .../tasks/get_cluster_hosts_target_gcp.yml | 14 ++ cluster_hosts/tasks/main.yml | 8 +- config/tasks/disks_auto_aws_gcp.yml | 77 +++++--- create/tasks/aws.yml | 2 +- create/tasks/gcp.yml | 18 +- dynamic_inventory/tasks/aws.yml | 23 --- dynamic_inventory/tasks/gcp.yml | 33 ---- dynamic_inventory/tasks/main.yml | 33 ++-- jenkinsfiles/Jenkinsfile_testsuite | 2 +- readiness/tasks/main.yml | 2 +- readiness/tasks/remove_maintenance_mode.yml | 58 ------ .../tasks/remove_maintenance_mode_aws.yml | 13 ++ .../tasks/remove_maintenance_mode_gcp.yml | 29 +++ .../__common/tasks/powerchange_vms_aws.yml | 27 +++ .../__common/tasks/powerchange_vms_azure.yml | 32 ++++ .../tasks/powerchange_vms_esxifree.yml | 28 +++ .../__common/tasks/powerchange_vms_gcp.yml | 31 +++ redeploy/__common/tasks/poweroff_vms.yml | 57 ------ redeploy/__common/tasks/poweron_vms.yml | 46 ----- .../tasks/set_lifecycle_state_label.yml | 31 --- .../tasks/set_lifecycle_state_label_aws.yml | 14 ++ .../tasks/set_lifecycle_state_label_gcp.yml | 16 ++ .../tasks/main.yml | 8 +- .../tasks/redeploy.yml | 7 +- .../tasks/rescue.yml | 12 +- .../tasks/main.yml | 23 ++- .../tasks/redeploy_by_hosttype_by_host.yml | 5 +- .../tasks/rescue.yml | 14 +- ..._diskinfo_to_cluster_hosts_target__aws.yml | 25 +-- ..._diskinfo_to_cluster_hosts_target__gcp.yml | 33 +--- .../tasks/by_hosttype_by_host.yml | 23 ++- .../tasks/main.yml | 10 +- .../tasks/by_hosttype_by_host.yml | 4 +- redeploy/meta/main.yml | 2 +- 61 files changed, 863 insertions(+), 749 deletions(-) create mode 100644 clean/tasks/aws.yml delete mode 100644 clean/tasks/clean_networking.yml delete mode 100644 clean/tasks/clean_vms.yml rename clean/tasks/{clean_dns.yml => dns.yml} (97%) create mode 100644 clean/tasks/gcp.yml delete mode 100644 cluster_hosts/tasks/get_cluster_hosts_state.yml create mode 100644 cluster_hosts/tasks/get_cluster_hosts_state_aws.yml create mode 100644 cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml create mode 100644 cluster_hosts/tasks/get_cluster_hosts_target_aws.yml create mode 100644 cluster_hosts/tasks/get_cluster_hosts_target_gcp.yml delete mode 100644 dynamic_inventory/tasks/aws.yml delete mode 100644 dynamic_inventory/tasks/gcp.yml delete mode 100644 readiness/tasks/remove_maintenance_mode.yml create mode 100644 readiness/tasks/remove_maintenance_mode_aws.yml create mode 100644 readiness/tasks/remove_maintenance_mode_gcp.yml create mode 100644 redeploy/__common/tasks/powerchange_vms_aws.yml create mode 100644 redeploy/__common/tasks/powerchange_vms_azure.yml create mode 100644 redeploy/__common/tasks/powerchange_vms_esxifree.yml create mode 100644 redeploy/__common/tasks/powerchange_vms_gcp.yml delete mode 100644 redeploy/__common/tasks/poweroff_vms.yml delete mode 100644 redeploy/__common/tasks/poweron_vms.yml delete mode 100644 redeploy/__common/tasks/set_lifecycle_state_label.yml create mode 100644 redeploy/__common/tasks/set_lifecycle_state_label_aws.yml create mode 100644 redeploy/__common/tasks/set_lifecycle_state_label_gcp.yml diff --git a/EXAMPLE/README.md b/EXAMPLE/README.md index 8f68fcb7..9e5a5527 100644 --- a/EXAMPLE/README.md +++ b/EXAMPLE/README.md @@ -17,17 +17,17 @@ The `cluster.yml` sub-role immutably deploys a cluster from the config defined a ### AWS: ``` -ansible-playbook -u ubuntu --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e cloud_type=aws -e region=eu-west-1 -e clusterid=test --vault-id=sandbox@.vaultpass-client.py -ansible-playbook -u ubuntu --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e cloud_type=aws -e region=eu-west-1 -e clusterid=test --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ -ansible-playbook -u ubuntu --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py -ansible-playbook -u ubuntu --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=aws -e region=eu-west-1 --vault-id=sandbox@.vaultpass-client.py +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=aws -e region=eu-west-1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ ``` ### GCP: ``` -ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py -ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ -ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py -ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=test_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=test_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ ``` ### Mandatory command-line variables: @@ -62,13 +62,13 @@ The `redeploy.yml` sub-role will completely redeploy the cluster; this is useful ### AWS: ``` -ansible-playbook -u ubuntu --private-key=/home//.ssh/ redeploy.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py -e canary=none -ansible-playbook -u ubuntu --private-key=/home//.ssh/ redeploy.yml -e buildenv=sandbox -e cloud_type=aws -e region=eu-west-1 -e clusterid=test --vault-id=sandbox@.vaultpass-client.py -e canary=none +ansible-playbook redeploy.yml -e buildenv=sandbox -e cloud_type=aws -e region=eu-west-1 -e clusterid=test --vault-id=sandbox@.vaultpass-client.py -e canary=none +ansible-playbook redeploy.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py -e canary=none ``` ### GCP: ``` -ansible-playbook -u --private-key=/home//.ssh/ redeploy.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py -e canary=none -ansible-playbook -u --private-key=/home//.ssh/ redeploy.yml -e buildenv=sandbox -e clusterid=test -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py -e canary=none +ansible-playbook redeploy.yml -e buildenv=sandbox -e clusterid=test -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py -e canary=none +ansible-playbook redeploy.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py -e canary=none ``` ### Mandatory command-line variables: diff --git a/EXAMPLE/cluster_defs/aws/testid/cluster_vars__clusterid.yml b/EXAMPLE/cluster_defs/aws/testid/cluster_vars__clusterid.yml index 42b49950..4156457b 100644 --- a/EXAMPLE/cluster_defs/aws/testid/cluster_vars__clusterid.yml +++ b/EXAMPLE/cluster_defs/aws/testid/cluster_vars__clusterid.yml @@ -18,7 +18,6 @@ beats_config: cluster_vars: dns_nameserver_zone: &dns_nameserver_zone "" # The zone that dns_server will operate on. gcloud dns needs a trailing '.'. Leave blank if no external DNS (use IPs only) dns_user_domain: "{%- if _dns_nameserver_zone -%}{{cloud_type}}-{{region}}.{{app_class}}.{{buildenv}}.{{_dns_nameserver_zone}}{%- endif -%}" # A user-defined _domain_ part of the FDQN, (if more prefixes are required before the dns_nameserver_zone) - dns_server: "" # Specify DNS server. nsupdate, route53 or clouddns. If empty string is specified, no DNS will be added. instance_profile_name: "" custom_tagslabels: inv_resident_id: "myresident" diff --git a/EXAMPLE/cluster_defs/aws/testid/eu-west-1/cluster_vars__region.yml b/EXAMPLE/cluster_defs/aws/testid/eu-west-1/cluster_vars__region.yml index 69e1389e..21773b54 100644 --- a/EXAMPLE/cluster_defs/aws/testid/eu-west-1/cluster_vars__region.yml +++ b/EXAMPLE/cluster_defs/aws/testid/eu-west-1/cluster_vars__region.yml @@ -1,5 +1,5 @@ --- cluster_vars: - image: "ami-04ffbabc7935ec0e9" # eu-west-1, ubuntu, 20.04, amd64, hvm-ssd, 20210108. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ + image: "ami-0dd0f5f97a21a8fe9" # eu-west-1, ubuntu, 20.04, amd64, hvm-ssd, 20210315. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ # image: "ami-0b850cf02cc00fdc8" # eu-west-1, CentOS7 diff --git a/EXAMPLE/cluster_defs/aws/testid/eu-west-1/sandbox/cluster_vars__buildenv.yml b/EXAMPLE/cluster_defs/aws/testid/eu-west-1/sandbox/cluster_vars__buildenv.yml index af0cd5d3..b0b529e6 100644 --- a/EXAMPLE/cluster_defs/aws/testid/eu-west-1/sandbox/cluster_vars__buildenv.yml +++ b/EXAMPLE/cluster_defs/aws/testid/eu-west-1/sandbox/cluster_vars__buildenv.yml @@ -50,6 +50,13 @@ cluster_vars: version: "{{sysdisks_version | default('')}}" vms_by_az: { a: 1, b: 1, c: 0 } + hostnvme-notnitro: + auto_volumes: + - { device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", volume_type: "ephemeral", ephemeral: ephemeral0 } + flavor: i3.large + version: "{{sys_version | default('')}}" + vms_by_az: { a: 1, b: 1, c: 0 } + hostnvme-multi: auto_volumes: - { device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", volume_type: "ephemeral", ephemeral: ephemeral0 } diff --git a/EXAMPLE/cluster_defs/cluster_vars.yml b/EXAMPLE/cluster_defs/cluster_vars.yml index 072a5d84..99ed20ec 100644 --- a/EXAMPLE/cluster_defs/cluster_vars.yml +++ b/EXAMPLE/cluster_defs/cluster_vars.yml @@ -8,9 +8,10 @@ redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addne #redeploy_scheme: _scheme_rmvm_keepdisk_rollback skip_dynamic_inventory_sshwait: true +test_touch_disks: true -app_name: "{{lookup('pipe', 'whoami')}}-test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. Provided is a default to ensure no accidental overwriting. -app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn +app_name: "{{lookup('pipe', 'whoami') | lower}}-test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. Provided is a default to ensure no accidental overwriting. +app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn beats_config: filebeat: diff --git a/EXAMPLE/cluster_defs/gcp/cluster_vars__cloud.yml b/EXAMPLE/cluster_defs/gcp/cluster_vars__cloud.yml index 265cde54..deef36a8 100644 --- a/EXAMPLE/cluster_defs/gcp/cluster_vars__cloud.yml +++ b/EXAMPLE/cluster_defs/gcp/cluster_vars__cloud.yml @@ -1,7 +1,7 @@ --- cluster_vars: - image: "projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20210112" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ + image: "projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20210315" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ # image: "projects/ubuntu-os-cloud/global/images/centos-7-v20201216 dns_cloud_internal_domain: "c.{{ (_gcp_service_account_rawtext | string | from_json).project_id }}.internal" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) dns_server: "clouddns" # Specify DNS server. nsupdate, route53 or clouddns. If empty string is specified, no DNS will be added. diff --git a/EXAMPLE/cluster_defs/test_aws_euw1/cluster_vars.yml b/EXAMPLE/cluster_defs/test_aws_euw1/cluster_vars.yml index f3e071a5..0a1cf7a2 100644 --- a/EXAMPLE/cluster_defs/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/cluster_defs/test_aws_euw1/cluster_vars.yml @@ -50,7 +50,7 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within cluster_vars: type: &cloud_type "aws" - image: "ami-04ffbabc7935ec0e9" # eu-west-1, ubuntu, 20.04, amd64, hvm-ssd, 20210108. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ + image: "ami-0dd0f5f97a21a8fe9" # eu-west-1, ubuntu, 20.04, amd64, hvm-ssd, 20210315. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ # image: "ami-0b850cf02cc00fdc8" # eu-west-1, CentOS7 region: ®ion "eu-west-1" dns_cloud_internal_domain: "{{_region}}.compute.internal" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) diff --git a/EXAMPLE/cluster_defs/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/cluster_defs/test_gcp_euw1/cluster_vars.yml index c7a8bbfc..a843c541 100644 --- a/EXAMPLE/cluster_defs/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/cluster_defs/test_gcp_euw1/cluster_vars.yml @@ -50,7 +50,7 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within cluster_vars: type: &cloud_type "gcp" - image: "projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20210112" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ + image: "projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20210315" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ # image: "projects/ubuntu-os-cloud/global/images/centos-7-v20201216 region: ®ion "europe-west1" dns_cloud_internal_domain: "c.{{ (_gcp_service_account_rawtext | string | from_json).project_id }}.internal" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) diff --git a/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml b/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml index 18e2f32b..4c0860b9 100644 --- a/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml +++ b/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml @@ -5,12 +5,12 @@ connection: local gather_facts: true tasks: - - import_role: + - include_role: name: 'clusterverse/_dependencies' - - import_role: + - include_role: name: 'clusterverse/cluster_hosts' - tasks_from: get_cluster_hosts_state.yml + tasks_from: "get_cluster_hosts_state_{{cluster_vars.type}}.yml" - block: - name: clusterverse_label_upgrade_v1-v2 | Add lifecycle_state and cluster_suffix label to AWS EC2 VM diff --git a/README.md b/README.md index 167c6726..99f81a75 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ A full-lifecycle, immutable cloud infrastructure cluster management **role**, us + **Multi-cloud:** clusterverse can manage cluster lifecycle in AWS and GCP + **Deploy:** You define your infrastructure as code (in Ansible yaml), and clusterverse will deploy it + **Scale-up:** If you change the cluster definitions and rerun the deploy, new nodes will be added. -+ **Redeploy (e.g. up-version):** If you need to up-version, the `redeploy.yml` playbook will replace each node in turn, (with optional callbacks), and rollback if any failures occur. ++ **Redeploy (e.g. up-version):** If you need to up-version, or replace the underlying OS, (i.e. to achieve fully immutable, zero-patching redeploys), the `redeploy.yml` playbook will replace each node in the cluster (via various redeploy schemes), and rollback if any failures occur. **clusterverse** is designed to manage base-vm infrastructure that underpins cluster-based infrastructure, for example, Couchbase, Kafka, Elasticsearch, or Cassandra. @@ -22,19 +22,19 @@ To active the pipenv: ### AWS + AWS account with IAM rights to create EC2 VMs and security groups in the chosen VPCs/subnets. Place the credentials in: - + `cluster_vars//aws_access_key:` - + `cluster_vars//aws_secret_key:` + + `cluster_vars[buildenv].aws_access_key:` + + `cluster_vars[buildenv].aws_secret_key:` + Preexisting VPCs: - + `cluster_vars//vpc_name: my-vpc-{{buildenv}}` + + `cluster_vars[buildenv].vpc_name: my-vpc-{{buildenv}}` + Preexisting subnets. This is a prefix - the cloud availability zone will be appended to the end (e.g. `a`, `b`, `c`). - + `cluster_vars//vpc_subnet_name_prefix: my-subnet-{{region}}` + + `cluster_vars[buildenv].vpc_subnet_name_prefix: my-subnet-{{region}}` + Preexisting keys (in AWS IAM): - + `cluster_vars//key_name: my_key__id_rsa` + + `cluster_vars[buildenv].key_name: my_key__id_rsa` ### GCP + Create a gcloud account. + Create a service account in `IAM & Admin` / `Service Accounts`. Download the json file locally. -+ Store the contents within the `cluster_vars/gcp_service_account_rawtext` variable. ++ Store the contents within the `cluster_vars[buildenv].gcp_service_account_rawtext` variable. + During execution, the json file will be copied locally because the Ansible GCP modules often require the file as input. + Google Cloud SDK needs to be installed to run gcloud command-line (e.g. to disable delete protection) - this is handled by `pipenv install` @@ -183,14 +183,13 @@ The role is designed to run in two modes: + The `redeploy.yml` sub-role will completely redeploy the cluster; this is useful for example to upgrade the underlying operating system version. + It supports `canary` deploys. The `canary` extra variable must be defined on the command line set to one of: `start`, `finish`, `none` or `tidy`. + It contains callback hooks: - + `mainclusteryml`: This is the name of the deployment playbook. It is called to rollback a failed deployment. It should be set to the value of the primary _deploy_ playbook yml (e.g. `cluster.yml`) + + `mainclusteryml`: This is the name of the deployment playbook. It is called to deploy nodes for the new cluster, or to rollback a failed deployment. It should be set to the value of the primary _deploy_ playbook yml (e.g. `cluster.yml`) + `predeleterole`: This is the name of a role that should be called prior to deleting VMs; it is used for example to eject nodes from a Couchbase cluster. It takes a list of `hosts_to_remove` VMs. + It supports pluggable redeployment schemes. The following are provided: + **_scheme_rmvm_rmdisk_only** + This is a very basic rolling redeployment of the cluster. - + Canary **is not** supported. + _Supports redploying to bigger, but not smaller clusters_ - + **It assumes a resilient deployment (it can tolerate one node being deleted from the cluster). There is no rollback in case of failure.** + + **It assumes a resilient deployment (it can tolerate one node being deleted from the cluster). There is _no rollback_ in case of failure.** + For each node in the cluster: + Run `predeleterole` + Delete/ terminate the node (note, this is _irreversible_). @@ -217,7 +216,7 @@ The role is designed to run in two modes: + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' + **_scheme_rmvm_keepdisk_rollback** + Redeploys the nodes one by one, and moves the secondary (non-root) disks from the old to the new (note, only non-ephemeral disks can be moved). - + _Cluster topology must remain identical. More disks may be added, but none may change or be removed._ + + _Cluster node topology must remain identical. More disks may be added, but none may change or be removed._ + **It assumes a resilient deployment (it can tolerate one node being removed from the cluster).** + For each node in the cluster: + Run `predeleterole` diff --git a/_dependencies/filter_plugins/custom.py b/_dependencies/filter_plugins/custom.py index 0796adaa..cf7e996d 100644 --- a/_dependencies/filter_plugins/custom.py +++ b/_dependencies/filter_plugins/custom.py @@ -1,6 +1,8 @@ #!/usr/bin/env python from ansible.utils.display import Display +from ansible import constants as C +from ansible.module_utils._text import to_native, to_text display = Display() # display.v(u"json_loads_loose - input type: %s" % type(inStr)) @@ -35,24 +37,28 @@ def iplookup(fqdn): return fqdn else: import dns.resolver - return str(dns.resolver.query(fqdn, 'A')[0]) + return to_text(dns.resolver.query(fqdn, 'A')[0]) # Returns a json object from a loosely defined string (e.g. encoded using single quotes instead of double), or an object containing "AnsibleUnsafeText" def json_loads_loose(inStr): - import re, json + import re, json, sys - display.vvv(u"json_loads_loose - input type: %s" % type(inStr)) + display.vv(u"json_loads_loose - input type: %s; value %s" % (type(inStr), inStr)) if type(inStr) is dict or type(inStr) is list: - json_object = json.loads((str(json.dumps(inStr))).encode('utf-8')) + json_object = json.loads((to_text(json.dumps(inStr))).encode('utf-8')) else: try: json_object = json.loads(inStr) - except (ValueError, AttributeError) as e: + except (ValueError, AttributeError, TypeError) as e: try: - json_object = json.loads(str(re.sub(r'\'(.*?)\'([,:}])', r'"\1"\2', inStr).replace(': True', ': "True"').replace(': False', ': "False"')).encode('utf-8')) - except (ValueError, AttributeError) as e: - display.v(u"json_loads_loose - WARNING: could not parse attribute string as json: %s" % inStr) + json_object = json.loads(to_text(re.sub(r'\'(.*?)\'([,:}])', r'"\1"\2', inStr).replace(': True', ': "True"').replace(': False', ': "False"')).encode('utf-8')) + except (ValueError, AttributeError, TypeError) as e: + display.warning(u"json_loads_loose - WARNING: could not parse attribute string (%s) as json: %s" % (to_native(inStr), to_native(e))) return inStr + except: + e = sys.exc_info()[0] + display.warning(u"json_loads_loose - WARNING: could not parse attribute string (%s) as json: %s" % (to_native(inStr), to_native(e))) + return inStr return json_object diff --git a/_dependencies/library/blockdevmap.py b/_dependencies/library/blockdevmap.py index ac6eba3e..3c10351e 100644 --- a/_dependencies/library/blockdevmap.py +++ b/_dependencies/library/blockdevmap.py @@ -1,4 +1,5 @@ # Copyright 2020 Dougal Seeley +# BSD 3-Clause License # https://github.com/dseeley/blockdevmap # Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. @@ -17,22 +18,16 @@ version_added: 1.0.0 short_description: blockdevmap description: - - Map the block device name as defined in AWS/GCP (e.g. /dev/sdf) with the volume provided to the OS + - Map the block device name as defined in AWS/GCP/Azure (e.g. /dev/sdf) with the volume provided to the OS authors: - Dougal Seeley - Amazon.com Inc. ''' EXAMPLES = ''' -- name: Get block device map information for GCP +- name: Get block device map information for cloud blockdevmap: - cloud_type: gcp - become: yes - register: r__blockdevmap - -- name: Get block device map information for AWS - blockdevmap: - cloud_type: aws + cloud_type: become: yes register: r__blockdevmap @@ -47,6 +42,7 @@ ''' RETURN = ''' +## AWS Nitro "device_map": [ { "FSTYPE": "ext4", @@ -85,6 +81,7 @@ "volume_id": "vol-0b05e48d5677db81a" } +## AWS non-Nitro "device_map": [ { "FSTYPE": "", @@ -108,11 +105,142 @@ "device_name_cloud": "/dev/sda1", "device_name_os": "/dev/xvda1" } + +## AZURE +"device_map": [ + { + "FSTYPE": "", + "HCTL": "0:0:0:0", + "MODEL": "Virtual Disk", + "MOUNTPOINT": "", + "NAME": "sda", + "SERIAL": "6002248071748569390b23178109d35e", + "SIZE": "32212254720", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "ROOTDISK", + "device_name_os": "/dev/sda", + "parttable_type": "gpt" + }, + { + "FSTYPE": "xfs", + "HCTL": "", + "MODEL": "", + "MOUNTPOINT": "/boot", + "NAME": "sda1", + "SERIAL": "", + "SIZE": "524288000", + "TYPE": "part", + "UUID": "8bd4ad1d-13a7-4bb1-a40c-b05444f11db3", + "device_name_cloud": "", + "device_name_os": "/dev/sda1", + "parttable_type": "gpt" + }, + { + "FSTYPE": "", + "HCTL": "", + "MODEL": "", + "MOUNTPOINT": "", + "NAME": "sda14", + "SERIAL": "", + "SIZE": "4194304", + "TYPE": "part", + "UUID": "", + "device_name_cloud": "", + "device_name_os": "/dev/sda14", + "parttable_type": "gpt" + }, + { + "FSTYPE": "vfat", + "HCTL": "", + "MODEL": "", + "MOUNTPOINT": "/boot/efi", + "NAME": "sda15", + "SERIAL": "", + "SIZE": "519045632", + "TYPE": "part", + "UUID": "F5EB-013D", + "device_name_cloud": "", + "device_name_os": "/dev/sda15", + "parttable_type": "gpt" + }, + { + "FSTYPE": "xfs", + "HCTL": "", + "MODEL": "", + "MOUNTPOINT": "/", + "NAME": "sda2", + "SERIAL": "", + "SIZE": "31161581568", + "TYPE": "part", + "UUID": "40a878b6-3fe8-4336-820a-951a19f79a76", + "device_name_cloud": "", + "device_name_os": "/dev/sda2", + "parttable_type": "gpt" + }, + { + "FSTYPE": "", + "HCTL": "0:0:0:1", + "MODEL": "Virtual Disk", + "MOUNTPOINT": "", + "NAME": "sdb", + "SERIAL": "60022480c891da018bdd14b5dd1895b0", + "SIZE": "4294967296", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "RESOURCEDISK", + "device_name_os": "/dev/sdb", + "parttable_type": "dos" + }, + { + "FSTYPE": "ext4", + "HCTL": "", + "MODEL": "", + "MOUNTPOINT": "/mnt/resource", + "NAME": "sdb1", + "SERIAL": "", + "SIZE": "4292870144", + "TYPE": "part", + "UUID": "95192b50-0c76-4a03-99a7-67fdc225504f", + "device_name_cloud": "", + "device_name_os": "/dev/sdb1", + "parttable_type": "dos" + }, + { + "FSTYPE": "", + "HCTL": "1:0:0:0", + "MODEL": "Virtual Disk", + "MOUNTPOINT": "", + "NAME": "sdc", + "SERIAL": "60022480b71fde48d1f2212130abc54e", + "SIZE": "1073741824", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "0", + "device_name_os": "/dev/sdc", + "parttable_type": "" + }, + { + "FSTYPE": "", + "HCTL": "1:0:0:1", + "MODEL": "Virtual Disk", + "MOUNTPOINT": "", + "NAME": "sdd", + "SERIAL": "60022480aa9c0d340c125a5295ee678d", + "SIZE": "1073741824", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "1", + "device_name_os": "/dev/sdd", + "parttable_type": "" + } +] ''' from ctypes import * from fcntl import ioctl import subprocess +import os import sys import json import re @@ -219,8 +347,8 @@ def __init__(self, module, **kwds): def get_lsblk(self): # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). Cannot use the --json output as it not supported on older versions of lsblk (e.g. CentOS 7) - lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL,SIZE', '-P', '-b']).decode().rstrip().split('\n') - os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL,SIZE,HCTL', '-P', '-b']).decode().rstrip().split('\n') + os_device_names = [dict((map(lambda x: x.strip("\"").rstrip(), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] os_device_names.sort(key=lambda k: k['NAME']) @@ -244,6 +372,26 @@ def __init__(self, **kwds): os_device.update({"device_name_os": "/dev/" + os_device['NAME'], "device_name_cloud": ""}) +class cAzureMapper(cBlockDevMap): + def __init__(self, **kwds): + super(cAzureMapper, self).__init__(**kwds) + + self.device_map = self.get_lsblk() + + # The Azure root and resource disks are symlinked at install time (by cloud-init) to /dev/disk/cloud/azure_[root|resource]. (They are NOT at predictable /dev/sd[a|b] locations) + # Other managed 'azure_datadisk' disks are mapped by udev (/etc/udev/rules.d/66-azure-storage.rules) when attached. + devrootdisk = os.path.basename(os.path.realpath('/dev/disk/cloud/azure_root')) + devresourcedisk = os.path.basename(os.path.realpath('/dev/disk/cloud/azure_resource')) + + for os_device in self.device_map: + os_device.update({"device_name_os": "/dev/" + os_device['NAME']}) + if os_device['NAME'] not in [devrootdisk,devresourcedisk]: + lun = os_device['HCTL'].split(':')[-1] if len(os_device['HCTL']) else "" + os_device.update({"device_name_cloud": lun}) + else: + os_device.update({"device_name_cloud": "ROOTDISK" if os_device['NAME'] in devrootdisk else "RESOURCEDISK"}) + + class cGCPMapper(cBlockDevMap): def __init__(self, **kwds): super(cGCPMapper, self).__init__(**kwds) @@ -279,7 +427,7 @@ def __init__(self, **kwds): self.module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) except TypeError as e: if instance_store_count < len(instance_store_map): - os_device.update({"device_name_os": os_device_path, "device_name_cloud": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": dev.get_volume_id()}) + os_device.update({"device_name_os": os_device_path, "device_name_cloud": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": instance_store_map[instance_store_count]['ephemeral_id']}) instance_store_count += 1 else: self.module.warn(u"%s is not an EBS device and there is no instance store mapping." % os_device_path) @@ -323,11 +471,11 @@ def get_block_device(self, stripped=False): def main(): if not (len(sys.argv) > 1 and sys.argv[1] == "console"): - module = AnsibleModule(argument_spec={"cloud_type": {"type": "str", "required": True, "choices": ['aws', 'gcp', 'lsblk']}}, supports_check_mode=True) + module = AnsibleModule(argument_spec={"cloud_type": {"type": "str", "required": True, "choices": ['aws', 'gcp', 'azure', 'lsblk']}}, supports_check_mode=True) else: # For testing without Ansible (e.g on Windows) class cDummyAnsibleModule(): - params = {"cloud_type": "aws"} + params = {"cloud_type": "azure"} def exit_json(self, changed, **kwargs): print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) @@ -345,6 +493,8 @@ def fail_json(self, msg): blockdevmap = cAwsMapper(module=module) elif module.params['cloud_type'] == 'gcp': blockdevmap = cGCPMapper(module=module) + elif module.params['cloud_type'] == 'azure': + blockdevmap = cAzureMapper(module=module) elif module.params['cloud_type'] == 'lsblk': blockdevmap = cLsblkMapper(module=module) else: diff --git a/_dependencies/library/blockdevmap_LICENSE b/_dependencies/library/blockdevmap_LICENSE index 7d404386..84260345 100644 --- a/_dependencies/library/blockdevmap_LICENSE +++ b/_dependencies/library/blockdevmap_LICENSE @@ -3,7 +3,7 @@ BSD 3-Clause License -Copyright (c) 2020, Dougal Seeley +Copyright (c) 2021, Dougal Seeley All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/_dependencies/library/blockdevmap_README.md b/_dependencies/library/blockdevmap_README.md index 330f2888..5ede9fb6 100644 --- a/_dependencies/library/blockdevmap_README.md +++ b/_dependencies/library/blockdevmap_README.md @@ -1,13 +1,23 @@ # blockdevmap -This is an Ansible module that is able to map AWS and GCP device names to the host device names. It returns a dictionary, derived from Linux `lsblk`, (augmented in the case of AWS with results from elsewhere). +This is an Ansible module (or python script) that is able to map AWS, GCP and Azure device names to the host device names. It returns a list of dictionaries (per disk), derived from Linux `lsblk`, (augmented in the case of AWS & Azure with disk information from other machine metadata). + +## Output +### Common ++ `lsblk` is run for options: NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL,SIZE,HCTL (all of which are supported from CentOS7+ and Ubuntu1804+). ++ _NAME_ is always the OS device name, although in some cases symlinked to a `xvd[\d]` name + + For convenience, this is copied to a parameter `device_name_os` ++ A parameter `device_name_cloud` is created that relates to the name the cloud gives to the device when it is created. ### AWS + On AWS 'nitro' instances all EBS mappings are attached to the NVME controller. The nvme mapping is non-deterministic though, so the script uses ioctl commands to query the nvme controller (from a script by Amazon that is present on 'Amazon Linux' machines: `/sbin/ebsnvme-id`. See documentation: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes). -+ For non-nitro EBS mapping, the script enumerates the mapping in the alphanumerical order of the disk device names. This is the correct order except for some very old RHEL/Centos AMIs, which are not supported. ++ For non-nitro EBS mapping, the script enumerates the mapping in the alphanumerical order of the disk device names. This is the correct order except for some very old RHEL/Centos AMIs, which are not supported. + For ephemeral volume mapping, it uses the http://169.254.169.254/latest/meta-data/block-device-mapping/ endpoint. ### GCP -+ GCP device names are user-defined, and appear as entries in the `lsblk` _SERIAL_ column, mapped to the `lsblk` _NAME_ column. ++ GCP device names are user-defined, and appear as entries in the `lsblk` _SERIAL_ column (which is copied for consistency to `device_name_cloud`). + +### Azure ++ Azure LUNs are user-defined, and appear as the last entry in the `lsblk` _HCTL_ column, (which is copied for consistency to `device_name_cloud`). ### lsblk + The script can be run as plain `lsblk` command, where the cloud provider does not include a mapping, and will return the information as a dictionary. For example, the _bytes_ mapped to the _NAME_ field could be cross-checked against the requested disk size to create a mapping. @@ -16,15 +26,9 @@ This is an Ansible module that is able to map AWS and GCP device names to the ho ## Execution This can be run as an Ansible module (needs root): ```yaml -- name: Get block device map information for GCP - blockdevmap: - cloud_type: gcp - become: yes - register: r__blockdevmap - -- name: Get block device map information for AWS +- name: Get block device map information for cloud blockdevmap: - cloud_type: aws + cloud_type: become: yes register: r__blockdevmap diff --git a/_dependencies/library/deprecate_str.py b/_dependencies/library/deprecate_str.py index 1a3821da..09a58000 100644 --- a/_dependencies/library/deprecate_str.py +++ b/_dependencies/library/deprecate_str.py @@ -1,4 +1,5 @@ # Copyright 2020 Dougal Seeley +# BSD 3-Clause License from __future__ import (absolute_import, division, print_function) diff --git a/_dependencies/tasks/main.yml b/_dependencies/tasks/main.yml index 6be99861..14106173 100644 --- a/_dependencies/tasks/main.yml +++ b/_dependencies/tasks/main.yml @@ -52,6 +52,10 @@ - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name) && volume_type!='ephemeral']\") | length == 0", fail_msg: "device_names /dev/sd[b-e] are only allowed for ephemeral volumes in AWS cluster_vars[buildenv].hosttype_vars. Please start non-ephemeral devices at /dev/sdf." } when: cluster_vars.type == "aws" + - assert: + that: "'{%- for hosttype in cluster_vars[buildenv].hosttype_vars | dict2items -%}{%- if ('lvmparams' not in hosttype.value and (hosttype.value.auto_volumes | length) == (hosttype.value.auto_volumes | map(attribute='mountpoint') | list | unique | count)) or ('lvmparams' in hosttype.value and (hosttype.value.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1)) -%}{%- else -%}{{hosttype.key}}{%- endif -%}{%- endfor -%}' == ''" + fail_msg: "All volume mountpoints must either be all different, or all the same (in which case, 'lvmparams' must be set)" + - name: Create gcp service account contents file from cluster_vars[buildenv].gcp_service_account_rawtext (unless already defined by user) block: diff --git a/clean/tasks/aws.yml b/clean/tasks/aws.yml new file mode 100644 index 00000000..5078a43f --- /dev/null +++ b/clean/tasks/aws.yml @@ -0,0 +1,36 @@ +--- + +- name: clean/aws | clean vms + block: + - name: clean/aws | Remove instances termination protection + ec2: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{ cluster_vars.region }}" + state: "{{ item.instance_state }}" + termination_protection: "no" + instance_ids: ["{{ item.instance_id }}"] + with_items: "{{ hosts_to_clean | json_query(\"[].{instance_id:instance_id, instance_state: instance_state}\") | default([]) }}" + + - name: clean/aws | Delete VMs + ec2: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{ cluster_vars.region }}" + state: "absent" + instance_ids: "{{ hosts_to_clean | json_query(\"[].instance_id\") }}" + wait: true + when: hosts_to_clean | length + + +- name: clean/aws | clean networking (when '-e clean=_all_') + block: + - name: clean/aws | Delete security group + ec2_group: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + name: "{{ cluster_name }}-sg" + vpc_id: "{{vpc_id}}" + state: absent + when: clean is defined and clean == '_all_' diff --git a/clean/tasks/clean_networking.yml b/clean/tasks/clean_networking.yml deleted file mode 100644 index 471e1ee5..00000000 --- a/clean/tasks/clean_networking.yml +++ /dev/null @@ -1,31 +0,0 @@ ---- - -- name: clean/networking/aws | Delete AWS security group - ec2_group: - name: "{{ cluster_name }}-sg" - region: "{{cluster_vars.region}}" - vpc_id: "{{vpc_id}}" - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - state: absent - when: cluster_vars.type == "aws" - -- block: - - name: clean/networking/gcp | Delete GCP cluster firewalls - gcp_compute_firewall: - name: "{{ item.name }}" - state: "absent" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - project: "{{cluster_vars[buildenv].vpc_host_project_id}}" - with_items: "{{ cluster_vars.firewall_rules }}" - - - name: clean/networking/gcp | Delete the GCP network (if -e create_gcp_network=true) - gcp_compute_network: - name: "{{cluster_vars[buildenv].vpc_network_name}}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - project: "{{cluster_vars[buildenv].vpc_host_project_id}}" - state: absent - when: create_gcp_network is defined and create_gcp_network|bool - when: cluster_vars.type == "gcp" diff --git a/clean/tasks/clean_vms.yml b/clean/tasks/clean_vms.yml deleted file mode 100644 index bca5e308..00000000 --- a/clean/tasks/clean_vms.yml +++ /dev/null @@ -1,65 +0,0 @@ ---- - -- name: clean/del_vms | hosts_to_clean - debug: msg="{{hosts_to_clean}}" - -- block: - - block: - - name: clean/del_vms/aws | Remove EC2 instances termination protection - ec2: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - state: "{{ item.instance_state }}" - termination_protection: "no" - instance_ids: ["{{ item.instance_id }}"] - with_items: "{{ hosts_to_clean | json_query(\"[].{instance_id:instance_id, instance_state: instance_state}\") | default([]) }}" - - - name: clean/del_vms/aws | Delete EC2 instances - ec2: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - state: "absent" - instance_ids: "{{ hosts_to_clean | json_query(\"[].instance_id\") }}" - wait: true - when: cluster_vars.type == "aws" - - - block: - - name: clean/del_vms/gcp | Remove deletion protection - command: "gcloud compute instances update {{item.name}} --no-deletion-protection --zone {{ item.regionzone }}" - when: cluster_vars[buildenv].deletion_protection | bool - with_items: "{{ hosts_to_clean }}" - -# - name: clean/del_vms/gcp | Remove deletion protection (broken until https://github.com/ansible-collections/ansible_collections_google/pull/163 gets into a release) -# gcp_compute_instance: -# name: "{{item.name}}" -# project: "{{cluster_vars[buildenv].vpc_project_id}}" -# zone: "{{ item.regionzone }}" -# auth_kind: "serviceaccount" -# service_account_file: "{{gcp_credentials_file}}" -# deletion_protection: 'no' -# with_items: "{{ hosts_to_clean }}" - - - name: clean/del_vms/gcp | Delete GCE VM - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.regionzone }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - state: "absent" - with_items: "{{ hosts_to_clean }}" - register: r__gcp_compute_instance - async: 7200 - poll: 0 - - - name: clean/del_vms/gcp | Wait for GCE VM deletion to complete - async_status: - jid: "{{ item.ansible_job_id }}" - register: async_jobs - until: async_jobs.finished - retries: 300 - with_items: "{{r__gcp_compute_instance.results}}" - when: cluster_vars.type == "gcp" - when: hosts_to_clean | length \ No newline at end of file diff --git a/clean/tasks/clean_dns.yml b/clean/tasks/dns.yml similarity index 97% rename from clean/tasks/clean_dns.yml rename to clean/tasks/dns.yml index 0de6977a..075e722a 100644 --- a/clean/tasks/clean_dns.yml +++ b/clean/tasks/dns.yml @@ -4,7 +4,7 @@ debug: msg="{{hosts_to_clean}}" - block: - - name: clean/dns/nsupdate | Delete DNS entries from bind (nsupdate) + - name: clean/dns/nsupdate | Delete DNS entries block: - name: clean/dns/nsupdate | Delete A records nsupdate: @@ -33,7 +33,7 @@ when: (item.name + '.' + cluster_vars.dns_user_domain + "." == cname_value) when: cluster_vars.dns_server == "nsupdate" - - name: clean/dns/route53 | Delete DNS entries from route53 + - name: clean/dns/route53 | Delete DNS entries block: - name: clean/dns/route53 | Get A records route53: @@ -90,7 +90,7 @@ when: (item.1.set.value is defined) and ((item.0.name | regex_replace('-(?!.*-).*')) == (item.1.set.record | regex_replace('^(.*?)\\..*$', '\\1'))) and (item.0.name == item.1.set.value | regex_replace('^(.*?)\\..*$', '\\1')) when: cluster_vars.dns_server == "route53" - - name: clean/dns/clouddns | Delete DNS entries from clouddns + - name: clean/dns/clouddns | Delete DNS entries block: - name: clean/dns/clouddns | Get managed zone(s) gcp_dns_managed_zone_info: diff --git a/clean/tasks/gcp.yml b/clean/tasks/gcp.yml new file mode 100644 index 00000000..d20ea3a7 --- /dev/null +++ b/clean/tasks/gcp.yml @@ -0,0 +1,62 @@ +--- + +- name: clean/gcp | clean vms + block: + - name: clean/gcp | Remove deletion protection + command: "gcloud compute instances update {{item.name}} --no-deletion-protection --zone {{ item.regionzone }}" + when: cluster_vars[buildenv].deletion_protection | bool + with_items: "{{ hosts_to_clean }}" + + #- name: clean/gcp | Remove deletion protection (broken until https://github.com/ansible-collections/ansible_collections_google/pull/163 gets into a release) + # gcp_compute_instance: + # name: "{{item.name}}" + # project: "{{cluster_vars[buildenv].vpc_project_id}}" + # zone: "{{ item.regionzone }}" + # auth_kind: "serviceaccount" + # service_account_file: "{{gcp_credentials_file}}" + # deletion_protection: 'no' + # with_items: "{{ hosts_to_clean }}" + + - name: clean/gcp | Delete VMs + gcp_compute_instance: + name: "{{item.name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{ item.regionzone }}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + state: "absent" + with_items: "{{ hosts_to_clean }}" + register: r__gcp_compute_instance + async: 7200 + poll: 0 + + - name: clean/gcp | Wait for VM deletion to complete + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__gcp_compute_instance.results}}" + when: hosts_to_clean | length + + +- name: clean/gcp | clean networking (when '-e clean=_all_') + block: + - name: clean/networking/gcp | Delete GCP cluster firewalls + gcp_compute_firewall: + name: "{{ item.name }}" + state: "absent" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + project: "{{cluster_vars[buildenv].vpc_host_project_id}}" + with_items: "{{ cluster_vars.firewall_rules }}" + + - name: clean/gcp | Delete the GCP network (if -e create_gcp_network=true) + gcp_compute_network: + name: "{{cluster_vars[buildenv].vpc_network_name}}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + project: "{{cluster_vars[buildenv].vpc_host_project_id}}" + state: absent + when: create_gcp_network is defined and create_gcp_network|bool + when: clean is defined and clean == '_all_' diff --git a/clean/tasks/main.yml b/clean/tasks/main.yml index 786b8c24..8c265565 100644 --- a/clean/tasks/main.yml +++ b/clean/tasks/main.yml @@ -2,16 +2,15 @@ - name: "Clean the cluster of VMs with lifecycle_state = {{clean}}" block: + - name: clean | hosts_to_clean + debug: msg={{hosts_to_clean}} + - name: clean | Delete DNS - include_tasks: clean_dns.yml + include_tasks: dns.yml when: (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") - - name: clean | Delete VMs - include_tasks: clean_vms.yml - - - name: clean | Delete Networking - include_tasks: clean_networking.yml - when: clean == '_all_' + - name: "clean | {{cluster_vars.type}}" + include_tasks: "{{cluster_vars.type}}.yml" vars: hosts_to_clean: | {%- if clean == '_all_' -%} diff --git a/cluster_hosts/tasks/get_cluster_hosts_state.yml b/cluster_hosts/tasks/get_cluster_hosts_state.yml deleted file mode 100644 index e2cb9023..00000000 --- a/cluster_hosts/tasks/get_cluster_hosts_state.yml +++ /dev/null @@ -1,57 +0,0 @@ ---- - -- name: get_cluster_hosts_state/aws | Get AWS cluster_hosts_state - block: - - name: get_cluster_hosts_state/aws | Get existing EC2 instance info - ec2_instance_info: - filters: - "tag:cluster_name": "{{cluster_name}}" - "instance-state-name": ["running", "stopped"] - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - register: r__ec2_instance_info - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_state/aws | Set cluster_hosts_state - set_fact: - cluster_hosts_state: "{{r__ec2_instance_info.instances | json_query(\"[].{name: tags.Name, regionzone: placement.availability_zone, tagslabels: tags, instance_id: instance_id, instance_state: state.name}\") }}" - when: cluster_vars.type == "aws" - -- name: get_cluster_hosts_state/gcp | Get GCP cluster_hosts_state - block: - - name: get_cluster_hosts_state/gcp | Get existing GCE instance info (per AZ) - gcp_compute_instance_info: - zone: "{{cluster_vars.region}}-{{item}}" - filters: - - "labels.cluster_name = {{cluster_name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - scopes: ["https://www.googleapis.com/auth/compute.readonly"] - with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" - register: r__gcp_compute_instance_info - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_state/gcp | Set cluster_hosts_state with correct regionzone (remove url) - set_fact: - cluster_hosts_state: | - {% set res = _cluster_hosts_state__urlregion -%} - {%- for cluster_host in res -%} - {%- set _ = cluster_host.update({'regionzone': cluster_host.regionzone | regex_replace('^.*/(.*)$', '\\1') }) -%} - {%- endfor -%} - {{ res }} - vars: - _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status}\") }}" - when: cluster_vars.type == "gcp" - - -- name: get_cluster_hosts_state | cluster_hosts_state - debug: msg="{{cluster_hosts_state}}" - delegate_to: localhost - run_once: true - when: cluster_hosts_state is defined - -#- pause: \ No newline at end of file diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml b/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml new file mode 100644 index 00000000..48bee157 --- /dev/null +++ b/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml @@ -0,0 +1,17 @@ +--- + +- name: get_cluster_hosts_state/aws | Get existing instance info + ec2_instance_info: + filters: + "tag:cluster_name": "{{cluster_name}}" + "instance-state-name": ["running", "stopped"] + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_state/aws | Set cluster_hosts_state + set_fact: + cluster_hosts_state: "{{r__ec2_instance_info.instances | json_query(\"[].{name: tags.Name, regionzone: placement.availability_zone, tagslabels: tags, instance_id: instance_id, instance_state: state.name, ipv4: {private: private_ip_address, public: public_ip_address}, disk_info_cloud: block_device_mappings }\") }}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml b/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml new file mode 100644 index 00000000..38ba26db --- /dev/null +++ b/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml @@ -0,0 +1,26 @@ +--- + +- name: get_cluster_hosts_state/gcp | Get existing instance info (per AZ) + gcp_compute_instance_info: + zone: "{{cluster_vars.region}}-{{item}}" + filters: + - "labels.cluster_name = {{cluster_name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + scopes: ["https://www.googleapis.com/auth/compute.readonly"] + with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" + register: r__gcp_compute_instance_info + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_state/gcp | Set cluster_hosts_state with correct regionzone (remove url) + set_fact: + cluster_hosts_state: | + {% set res = _cluster_hosts_state__urlregion -%} + {%- for cluster_host in res -%} + {%- set _ = cluster_host.update({'regionzone': cluster_host.regionzone | regex_replace('^.*/(.*)$', '\\1') }) -%} + {%- endfor -%} + {{ res }} + vars: + _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status, ipv4: {private: networkInterfaces[0].networkIP, public: networkInterfaces[0].accessConfigs[0].natIP}, disk_info_cloud: disks }\") }}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_target.yml b/cluster_hosts/tasks/get_cluster_hosts_target.yml index d5ca85aa..7fea699f 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_target.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_target.yml @@ -1,123 +1,33 @@ ---- - -# Create an array of dictionaries containing all the hostnames PER-AZ (i.e. couchbase-dev-node-a0, couchbase-dev-master-a1, couchbase-dev-master-b0, couchbase-dev-master-b1 etc) to be created: -- name: get_cluster_hosts_target | Create cluster_hosts_target from the cluster definition in cluster_vars - set_fact: - cluster_hosts_target: | - {% set res = [] -%} - {%- for hostttype in cluster_vars[buildenv].hosttype_vars.keys() -%} - {%- for azname in cluster_vars[buildenv].hosttype_vars[hostttype].vms_by_az.keys() -%} - {%- for azcount in range(0,cluster_vars[buildenv].hosttype_vars[hostttype].vms_by_az[azname]|int) -%} - {% set _dummy = res.extend([{ - 'hosttype': hostttype, - 'hostname': cluster_name + '-' + hostttype + '-' + azname + azcount|string + '-' + cluster_suffix|string, - 'az_name': azname|string, - 'flavor': cluster_vars[buildenv].hosttype_vars[hostttype].flavor, - 'auto_volumes': cluster_vars[buildenv].hosttype_vars[hostttype].auto_volumes - }]) -%} - {%- endfor %} - {%- endfor %} - {%- endfor %} - {{ res }} - -- name: get_cluster_hosts_target/aws | AWS-specific modifications to cluster_hosts_target - add subnets. - block: - # Dynamically look up VPC ID by name from aws - - name: get_cluster_hosts_target | Looking up VPC facts to extract ID - ec2_vpc_net_info: - region: "{{ cluster_vars.region }}" - aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" - aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" - filters: - "tag:Name": "{{ cluster_vars[buildenv].vpc_name }}" - register: r__ec2_vpc_net_info - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_target/aws | Set VPC ID in variable - set_fact: - vpc_id: "{{ r__ec2_vpc_net_info.vpcs[0].id }}" - - - name: get_cluster_hosts_target/aws | Look up proxy subnet facts - ec2_vpc_subnet_info: - region: "{{ cluster_vars.region }}" - aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" - aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" - filters: - vpc-id: "{{ vpc_id }}" - register: r__ec2_vpc_subnet_info - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_target/aws | Update cluster_hosts_target with subnet_ids - set_fact: - cluster_hosts_target: | - {%- for host in cluster_hosts_target -%} - {%- set subnet_id = r__ec2_vpc_subnet_info | to_json | from_json | json_query('subnets[?starts_with(tags.Name, \'' + cluster_vars[buildenv].vpc_subnet_name_prefix + host.az_name +'\')].subnet_id|[0]') -%} - {%- set _dummy = host.update({'vpc_subnet_id': subnet_id | string}) -%} - {%- endfor %} - {{ cluster_hosts_target }} - - - block: - - name: get_cluster_hosts_target/aws | Get snapshots info - ec2_snapshot_info: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - filters: "{{ _snapshot_tags[0] }}" - register: r__ebs_snapshots - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_target/aws | Assert that number of snapshots eq number of hosts - assert: - that: - - _available_snapshots|length == cluster_hosts_target|length - quiet: true - fail_msg: "There are {{ _available_snapshots|length }} available snapshots and {{ cluster_hosts_target|length }} nodes. Snapshot restore available only to the same infrastructure size." - vars: - _available_snapshots: "{{ r__ebs_snapshots.snapshots|json_query('[].snapshot_id') }}" - delegate_to: localhost - run_once: true - - ## [ See github.com/ansible/ansible/issues/27299 for reason for '| to_json | from_json' ] - - name: get_cluster_hosts_target/aws | update cluster_hosts_target with snapshot_id - set_fact: - cluster_hosts_target: | - {%- for host in cluster_hosts_target -%} - {%- set cluster_host_topology = host.hostname | regex_replace('^.*(-.*?-).*$', '\\1') -%} - {%- for vol in host.auto_volumes -%} - {%- set cur_snapshot = r__ebs_snapshots | default([]) | to_json | from_json | json_query('snapshots[?contains(tags.Name, \'' + cluster_host_topology + '\')]') -%} - {%- if cur_snapshot and 'snapshot_tags' in vol.keys() -%} - {%- set _dummy = vol.update({'snapshot': cur_snapshot[0].snapshot_id}) -%} - {%- set _dummy = vol.pop('snapshot_tags') -%} - {%- endif %} - {%- endfor %} - {%- endfor %} - {{ cluster_hosts_target }} - vars: - _snapshot_tags: "{{ cluster_vars[buildenv].hosttype_vars|json_query('*.auto_volumes[].snapshot_tags') }}" - when: _snapshot_tags|length > 0 - when: cluster_vars.type == "aws" - - -- name: get_cluster_hosts_target/gcp | GCP-specific modifications to cluster_hosts_target - block: - - name: get_cluster_hosts_target/gcp | Update cluster_hosts_target auto_volumes with device_name and initialize_params - set_fact: - cluster_hosts_target: |- - {%- for host in cluster_hosts_target -%} - {%- for vol in host.auto_volumes -%} - {%- if 'device_name' not in vol -%} - {%- set _dummy = vol.update({'device_name': host.hostname + '--' + vol.mountpoint | basename }) -%} - {%- set _dummy = vol.update({'initialize_params': {'disk_name': vol.device_name, 'disk_size_gb': vol.volume_size}}) -%} - {%- endif -%} - {%- endfor %} - {%- endfor %} - {{ cluster_hosts_target }} - when: cluster_vars.type == "gcp" - -- name: get_cluster_hosts_target | cluster_hosts_target - debug: msg={{cluster_hosts_target}} - delegate_to: localhost - run_once: true +--- + +# Create an array of dictionaries containing all the hostnames PER-AZ (i.e. couchbase-dev-node-a0, couchbase-dev-master-a1, couchbase-dev-master-b0, couchbase-dev-master-b1 etc) to be created: +- name: get_cluster_hosts_target | Create cluster_hosts_target from the cluster definition in cluster_vars + set_fact: + cluster_hosts_target: | + {% set res = [] -%} + {%- for hostttype in cluster_vars[buildenv].hosttype_vars.keys() -%} + {%- for azname in cluster_vars[buildenv].hosttype_vars[hostttype].vms_by_az.keys() -%} + {%- for azcount in range(0,cluster_vars[buildenv].hosttype_vars[hostttype].vms_by_az[azname]|int) -%} + {% set _dummy = res.extend([{ + 'hosttype': hostttype, + 'hostname': cluster_name + '-' + hostttype + '-' + azname|string + azcount|string + '-' + cluster_suffix|string, + 'az_name': azname|string, + 'flavor': cluster_vars[buildenv].hosttype_vars[hostttype].flavor, + 'auto_volumes': cluster_vars[buildenv].hosttype_vars[hostttype].auto_volumes + }]) -%} + {%- endfor %} + {%- endfor %} + {%- endfor %} + {{ res }} + + +- name: get_cluster_hosts_target | Augment with cloud-specific parameters (if necessary) + include: "{{ item }}" + loop: "{{ query('first_found', params) }}" + vars: { params: { files: ["get_cluster_hosts_target_{{cluster_vars.type}}.yml"], skip: true } } + + +- name: get_cluster_hosts_target | cluster_hosts_target + debug: msg={{cluster_hosts_target}} + delegate_to: localhost + run_once: true diff --git a/cluster_hosts/tasks/get_cluster_hosts_target_aws.yml b/cluster_hosts/tasks/get_cluster_hosts_target_aws.yml new file mode 100644 index 00000000..c61f35d4 --- /dev/null +++ b/cluster_hosts/tasks/get_cluster_hosts_target_aws.yml @@ -0,0 +1,78 @@ +--- + +# Dynamically look up VPC ID by name from aws +- name: get_cluster_hosts_target | Looking up VPC facts to extract ID + ec2_vpc_net_info: + region: "{{ cluster_vars.region }}" + aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" + aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" + filters: + "tag:Name": "{{ cluster_vars[buildenv].vpc_name }}" + register: r__ec2_vpc_net_info + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_target/aws | Set VPC ID in variable + set_fact: + vpc_id: "{{ r__ec2_vpc_net_info.vpcs[0].id }}" + +- name: get_cluster_hosts_target/aws | Look up proxy subnet facts + ec2_vpc_subnet_info: + region: "{{ cluster_vars.region }}" + aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" + aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" + filters: + vpc-id: "{{ vpc_id }}" + register: r__ec2_vpc_subnet_info + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_target/aws | Update cluster_hosts_target with subnet_ids + set_fact: + cluster_hosts_target: | + {%- for host in cluster_hosts_target -%} + {%- set subnet_id = r__ec2_vpc_subnet_info | to_json | from_json | json_query('subnets[?starts_with(tags.Name, \'' + cluster_vars[buildenv].vpc_subnet_name_prefix + host.az_name +'\')].subnet_id|[0]') -%} + {%- set _dummy = host.update({'vpc_subnet_id': subnet_id | string}) -%} + {%- endfor %} + {{ cluster_hosts_target }} + +- block: + - name: get_cluster_hosts_target/aws | Get snapshots info + ec2_snapshot_info: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{ cluster_vars.region }}" + filters: "{{ _snapshot_tags[0] }}" + register: r__ebs_snapshots + delegate_to: localhost + run_once: true + + - name: get_cluster_hosts_target/aws | Assert that number of snapshots eq number of hosts + assert: + that: + - _available_snapshots|length == cluster_hosts_target|length + quiet: true + fail_msg: "There are {{ _available_snapshots|length }} available snapshots and {{ cluster_hosts_target|length }} nodes. Snapshot restore available only to the same infrastructure size." + vars: + _available_snapshots: "{{ r__ebs_snapshots.snapshots|json_query('[].snapshot_id') }}" + delegate_to: localhost + run_once: true + + ## [ See github.com/ansible/ansible/issues/27299 for reason for '| to_json | from_json' ] + - name: get_cluster_hosts_target/aws | update cluster_hosts_target with snapshot_id + set_fact: + cluster_hosts_target: | + {%- for host in cluster_hosts_target -%} + {%- set cluster_host_topology = host.hostname | regex_replace('^.*(-.*?-).*$', '\\1') -%} + {%- for vol in host.auto_volumes -%} + {%- set cur_snapshot = r__ebs_snapshots | default([]) | to_json | from_json | json_query('snapshots[?contains(tags.Name, \'' + cluster_host_topology + '\')]') -%} + {%- if cur_snapshot and 'snapshot_tags' in vol.keys() -%} + {%- set _dummy = vol.update({'snapshot': cur_snapshot[0].snapshot_id}) -%} + {%- set _dummy = vol.pop('snapshot_tags') -%} + {%- endif %} + {%- endfor %} + {%- endfor %} + {{ cluster_hosts_target }} + vars: + _snapshot_tags: "{{ cluster_vars[buildenv].hosttype_vars|json_query('*.auto_volumes[].snapshot_tags') }}" + when: _snapshot_tags|length > 0 diff --git a/cluster_hosts/tasks/get_cluster_hosts_target_gcp.yml b/cluster_hosts/tasks/get_cluster_hosts_target_gcp.yml new file mode 100644 index 00000000..a7494e88 --- /dev/null +++ b/cluster_hosts/tasks/get_cluster_hosts_target_gcp.yml @@ -0,0 +1,14 @@ +--- + +- name: get_cluster_hosts_target/gcp | Update cluster_hosts_target auto_volumes with device_name and initialize_params + set_fact: + cluster_hosts_target: |- + {%- for host in cluster_hosts_target -%} + {%- for vol in host.auto_volumes -%} + {%- if 'device_name' not in vol -%} + {%- set _dummy = vol.update({'device_name': host.hostname + '--' + vol.mountpoint | basename }) -%} + {%- set _dummy = vol.update({'initialize_params': {'disk_name': vol.device_name, 'disk_size_gb': vol.volume_size}}) -%} + {%- endif -%} + {%- endfor %} + {%- endfor %} + {{ cluster_hosts_target }} diff --git a/cluster_hosts/tasks/main.yml b/cluster_hosts/tasks/main.yml index 1e5c95fe..02f6f6db 100644 --- a/cluster_hosts/tasks/main.yml +++ b/cluster_hosts/tasks/main.yml @@ -1,7 +1,13 @@ --- - name: Get the state of the VMs in the cluster - include_tasks: get_cluster_hosts_state.yml + include_tasks: "get_cluster_hosts_state_{{cluster_vars.type}}.yml" + +- name: get_cluster_hosts_state | cluster_hosts_state + debug: msg="{{cluster_hosts_state}}" + delegate_to: localhost + run_once: true + when: cluster_hosts_state is defined - assert: that: diff --git a/config/tasks/disks_auto_aws_gcp.yml b/config/tasks/disks_auto_aws_gcp.yml index 2186852d..a1452cf6 100644 --- a/config/tasks/disks_auto_aws_gcp.yml +++ b/config/tasks/disks_auto_aws_gcp.yml @@ -1,7 +1,7 @@ --- - name: disks_auto_aws_gcp | cluster_hosts_target(inventory_hostname) - debug: msg={{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`] \") }} + debug: msg={{ cluster_hosts_target | json_query(\"[?hostname == '\" + inventory_hostname + \"'] \") }} - name: disks_auto_aws_gcp | Mount block devices as individual disks block: @@ -24,7 +24,7 @@ dev: "{{ _dev }}" loop: "{{auto_vols}}" vars: - _dev: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == `\" + item.device_name + \"` && TYPE==`disk` && parttable_type==`` && FSTYPE==`` && MOUNTPOINT==``].device_name_os | [0]\") }}" + _dev: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == '\" + item.device_name + \"' && TYPE=='disk' && parttable_type=='' && FSTYPE=='' && MOUNTPOINT==''].device_name_os | [0]\") }}" when: _dev is defined and _dev != '' - name: disks_auto_aws_gcp | Get the block device information (post-filesystem create), to get the block IDs for mounting @@ -46,7 +46,7 @@ opts: _netdev loop: "{{auto_vols}}" vars: - _UUID: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == `\" + item.device_name + \"` && TYPE==`disk` && parttable_type==`` && MOUNTPOINT==``].UUID | [0]\") }}" + _UUID: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == '\" + item.device_name + \"' && TYPE=='disk' && parttable_type=='' && MOUNTPOINT==''].UUID | [0]\") }}" when: _UUID is defined and _UUID != '' - name: disks_auto_aws_gcp | change ownership of mountpoint (if set) @@ -59,49 +59,60 @@ group: "{{ item.perms.group | default(omit)}}" loop: "{{auto_vols}}" - - block: - - name: disks_auto_aws_gcp | Touch a file with the mountpoint and device name for testing that disk attachment is correct + - name: disks_auto_aws_gcp | Check that we haven't mounted disks in the wrong place. Especially useful for redeploys when we're moving disks. + block: + - name: "disks_auto_aws_gcp | Touch a file with the mountpoint and device name for testing that disk attachment is correct. Note: Use a unique filename here instead of writing to a file, so that more than one file per device is an error." become: yes file: - path: "{{item.mountpoint}}/__clusterversetest_{{ item.mountpoint | regex_replace('\\/', '_') }}_{{ item.device_name | regex_replace('\/', '_') }}" + path: "{{item.mountpoint}}/.clusterversetest__{{inventory_hostname | regex_replace('-(?!.*-).*')}}__{{ item.mountpoint | regex_replace('\\/', '_') }}__{{ item.device_name | regex_replace('\/', '_') }}" state: touch loop: "{{auto_vols}}" - - name: disks_auto_aws_gcp | Find all __clusterversetest_ files in newly mounted disks + - name: disks_auto_aws_gcp | Find all .clusterversetest__ files in mounted disks find: paths: "{{item.mountpoint}}" - patterns: "__clusterversetest_*" + hidden: yes + patterns: ".clusterversetest__*" loop: "{{auto_vols}}" register: r__find_test - - name: disks_auto_aws_gcp | Display all __clusterversetest_ files in newly mounted disks. - debug: - msg: "{{ r__find_test | json_query(\"results[].{device_name: item.device_name, mountpoint: item.mountpoint, files: files[].path}\") }}" + - name: disks_auto_aws_gcp | Check that there is only one .clusterversetest__ file per device in mounted disks. + block: + - name: disks_auto_aws_gcp | testdevicedescriptor + debug: msg={{testdevicedescriptor}} + + - name: disks_auto_aws_gcp | assert that only one device descriptor file exists per disk (otherwise, indicates that this run has mapped either more than one device per mount, or a different one to previous) + assert: { that: "testdevicedescriptor | json_query(\"[?length(files) > `1`]\") | length == 0", fail_msg: "ERROR - only a single file should exist per storage device. In error [{{testdevicedescriptor | json_query(\"[?length(files) > `1`]\")}}]" } + vars: + testdevicedescriptor: "{{ r__find_test | json_query(\"results[].{hostname: '\" + inventory_hostname + \"', device_name: item.device_name, mountpoint: item.mountpoint, files: files[].path}\") }}" when: test_touch_disks is defined and test_touch_disks|bool when: (auto_vols | map(attribute='mountpoint') | list | unique | count == auto_vols | map(attribute='mountpoint') | list | count) vars: - auto_vols: "{{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`].auto_volumes[]\") }}" + auto_vols: "{{ cluster_hosts_target | json_query(\"[?hostname == '\" + inventory_hostname + \"'].auto_volumes[]\") }}" # The following block mounts all attached volumes that have a single, common mountpoint, by creating a logical volume -- name: disks_auto_aws_gcp | Mount block devices in a single LVM mountpoint through LV/VG +- name: disks_auto_aws_gcp/lvm | Mount block devices in a single LVM mountpoint through LV/VG block: - - name: disks_auto_aws_gcp | Install logical volume management tooling. (yum - RedHat/CentOS) + - name: disks_auto_aws_gcp/lvm | hosttype_vars + debug: msg={{ hosttype_vars }} + + - name: disks_auto_aws_gcp/lvm | Install logical volume management tooling. (yum - RedHat/CentOS) become: true yum: name: "lvm*" state: present when: ansible_os_family == 'RedHat' - - name: disks_auto_aws_gcp | Get the device information (pre-filesystem create) + - name: disks_auto_aws_gcp/lvm | Get the device information (pre-filesystem create) blockdevmap: become: yes register: r__blockdevmap - - name: disks_auto_aws_gcp | r__blockdevmap (pre-filesystem create) + - name: disks_auto_aws_gcp/lvm | r__blockdevmap (pre-filesystem create) debug: msg={{r__blockdevmap}} - - name: disks_auto_aws_gcp | Create a volume group from all block devices + - name: disks_auto_aws_gcp/lvm | Create a volume group from all block devices become: yes lvg: vg: "{{ hosttype_vars.lvmparams.vg_name }}" @@ -109,21 +120,21 @@ vars: auto_vol_device_names: "{{hosttype_vars.auto_volumes | map(attribute='device_name') | sort | join(',')}}" - - name: disks_auto_aws_gcp | Create a logical volume from volume group + - name: disks_auto_aws_gcp/lvm | Create a logical volume from volume group become: yes lvol: vg: "{{ hosttype_vars.lvmparams.vg_name }}" lv: "{{ hosttype_vars.lvmparams.lv_name }}" size: "{{ hosttype_vars.lvmparams.lv_size }}" - - name: disks_auto_aws_gcp | Create filesystem(s) on attached volume(s) + - name: disks_auto_aws_gcp/lvm | Create filesystem(s) on attached volume(s) become: yes filesystem: fstype: "{{ hosttype_vars.auto_volumes[0].fstype }}" dev: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" force: no - - name: disks_auto_aws_gcp | Mount created filesytem(s) persistently + - name: disks_auto_aws_gcp/lvm | Mount created filesytem(s) persistently become: yes mount: path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" @@ -132,23 +143,31 @@ state: mounted opts: _netdev - - block: - - name: disks_auto_aws_gcp | Touch a file with the mountpoint and device name for testing that disk attachment is correct + - name: disks_auto_aws_gcp/lvm | Check that we haven't mounted disks in the wrong place. Especially useful for redeploys when we're moving disks. + block: + - name: "disks_auto_aws_gcp/lvm | Touch a file with the mountpoint for testing that disk attachment is correct. Note: Use a unique filename here instead of writing to a file, so that more than one file per device is an error." become: yes file: - path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}/__clusterversetest_{{ hosttype_vars.auto_volumes[0].mountpoint | regex_replace('\\/', '_') }}" + path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}/.clusterversetest__{{inventory_hostname | regex_replace('-(?!.*-).*')}}__{{ hosttype_vars.auto_volumes[0].mountpoint | regex_replace('\\/', '_') }}" state: touch - - name: disks_auto_aws_gcp | Find all __clusterversetest_ files in newly mounted disks + - name: disks_auto_aws_gcp/lvm | Find all .clusterversetest__ files in mounted disks find: paths: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" - patterns: "__clusterversetest_*" + hidden: yes + patterns: ".clusterversetest__*" register: r__find_test - - name: disks_auto_aws_gcp | Display all __clusterversetest_ files in newly mounted disks. - debug: - msg: "{{ r__find_test | json_query(\"files[].path\") }}" + - name: disks_auto_aws_gcp/lvm | Check that there is only one .clusterversetest__ file per device in mounted disks. + block: + - name: disks_auto_aws_gcp/lvm | testdevicedescriptor + debug: msg={{testdevicedescriptor}} + + - name: disks_auto_aws_gcp/lvm | assert that only one device descriptor file exists per disk (otherwise, indicates that this run has mapped either more than one device per mount, or a different one to previous) + assert: { that: "testdevicedescriptor | json_query(\"[?length(files) > `1`]\") | length == 0", fail_msg: "ERROR - only a single file should exist per storage device. In error [{{testdevicedescriptor | json_query(\"[?length(files) > `1`]\")}}]" } + vars: + testdevicedescriptor: "{{ r__find_test | json_query(\"results[].{hostname: '\" + inventory_hostname + \"', device_name: item.device_name, mountpoint: item.mountpoint, files: files[].path}\") }}" when: test_touch_disks is defined and test_touch_disks|bool when: ('lvmparams' in hosttype_vars) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | count >= 2) and (hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | count == 1) vars: - hosttype_vars: "{{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`]\") }}" + hosttype_vars: "{{ cluster_hosts_target | json_query(\"[?hostname == '\" + inventory_hostname + \"']\") }}" diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index 77a3ee2e..7071d6cf 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -3,7 +3,7 @@ - name: cluster_hosts_target_denormalised_by_volume debug: msg="{{cluster_hosts_target_denormalised_by_volume}}" -- name: create/aws | Create AWS security group +- name: create/aws | Create security groups ec2_group: name: "{{ cluster_name }}-sg" description: "{{ cluster_name }} rules" diff --git a/create/tasks/gcp.yml b/create/tasks/gcp.yml index 716ee14e..23bac4d2 100644 --- a/create/tasks/gcp.yml +++ b/create/tasks/gcp.yml @@ -1,8 +1,8 @@ --- -- name: create/gcp | Create GCP network and subnetwork (if -e create_gcp_network=true) +- name: create/gcp | Create network and subnetwork (if -e create_gcp_network=true) block: - - name: create/gcp | Create GCP host network (if -e create_gcp_network=true) + - name: create/gcp | Create host network (if -e create_gcp_network=true) gcp_compute_network: name: "{{cluster_vars[buildenv].vpc_network_name}}" auto_create_subnetworks: "{%- if cluster_vars[buildenv].vpc_subnet_name is defined and cluster_vars[buildenv].vpc_subnet_name != '' -%} false {%- else -%} true {%- endif -%}" @@ -11,7 +11,7 @@ service_account_file: "{{gcp_credentials_file}}" register: r__gcp_compute_network - - name: create/gcp | Create GCP host subnetwork (if -e create_gcp_network=true) + - name: create/gcp | Create host subnetwork (if -e create_gcp_network=true) gcp_compute_subnetwork: name: "{{cluster_vars[buildenv].vpc_subnet_name}}" network: "{{r__gcp_compute_network}}" @@ -22,9 +22,9 @@ when: create_gcp_network is defined and create_gcp_network|bool -- name: create/gcp | Create GCP firewalls +- name: create/gcp | Create firewalls block: - - name: create/gcp | Get GCP network facts + - name: create/gcp | Get network facts gcp_compute_network_info: filters: - "name = {{cluster_vars[buildenv].vpc_network_name}}" @@ -37,7 +37,7 @@ - name: "Assert that {{cluster_vars[buildenv].vpc_network_name}} network exists" assert: { that: "r__gcp_compute_network_info['resources'] | length > 0", msg: "The {{cluster_vars[buildenv].vpc_network_name}} network must exist (create with ' -e create_gcp_network=true')" } - - name: create/gcp | Get GCP subnetwork facts + - name: create/gcp | Get subnetwork facts gcp_compute_subnetwork_info: filters: - "name = {{cluster_vars[buildenv].vpc_subnet_name}}" @@ -53,7 +53,7 @@ assert: { that: "r__gcp_compute_subnetwork_info['resources'] | length > 0", msg: "The {{cluster_vars[buildenv].vpc_subnet_name}} subnet must exist" } when: (cluster_vars[buildenv].vpc_subnet_name is defined) and (cluster_vars[buildenv].vpc_subnet_name != "") - - name: create/gcp | Create GCP cluster firewalls + - name: create/gcp | Create cluster firewalls gcp_compute_firewall: name: "{{ item.name }}" target_tags: "{{cluster_vars.network_fw_tags}}" @@ -68,7 +68,7 @@ with_items: "{{ cluster_vars.firewall_rules }}" -- name: create/gcp | Create GCP VMs asynchronously and wait for completion +- name: create/gcp | Create VMs asynchronously and wait for completion block: - name: create/gcp | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_rollback redeploy, we only redeploy one host at a time, and it is already powered off) gce_pd: @@ -82,7 +82,7 @@ name: "{{item.auto_volume.src.source_url | basename}}" loop: "{{ cluster_hosts_target_denormalised_by_volume | selectattr('auto_volume.src', 'defined') | list }}" - - name: create/gcp | Create GCP VMs asynchronously + - name: create/gcp | Create VMs asynchronously gcp_compute_instance: auth_kind: "serviceaccount" service_account_file: "{{gcp_credentials_file}}" diff --git a/dynamic_inventory/tasks/aws.yml b/dynamic_inventory/tasks/aws.yml deleted file mode 100644 index 38713bdd..00000000 --- a/dynamic_inventory/tasks/aws.yml +++ /dev/null @@ -1,23 +0,0 @@ ---- - -- name: dynamic_inventory/aws | Get AWS instance facts - ec2_instance_info: - filters: - "tag:cluster_name": "{{cluster_name}}" - "instance-state-name": ["running"] - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - register: r__ec2_instance_info - delegate_to: localhost - -#- debug: msg={{r__ec2_instance_info}} - -- name: dynamic_inventory/aws | Set dynamic_inventory_flat - set_fact: - dynamic_inventory_flat: | - {%- if cluster_vars.inventory_ip == 'private' -%} - {{ r__ec2_instance_info.instances | json_query('[*].{hosttype: tags.hosttype, hostname: tags.Name, private_ip: private_ip_address, public_ip: public_ip_address, inventory_ip: private_ip_address, regionzone: placement.availability_zone}') }} - {%- else -%} - {{ r__ec2_instance_info.instances | json_query('[*].{hosttype: tags.hosttype, hostname: tags.Name, private_ip: private_ip_address, public_ip: public_ip_address, inventory_ip: public_ip_address regionzone: placement.availability_zone}') }} - {%- endif -%} \ No newline at end of file diff --git a/dynamic_inventory/tasks/gcp.yml b/dynamic_inventory/tasks/gcp.yml deleted file mode 100644 index 4b5c46f0..00000000 --- a/dynamic_inventory/tasks/gcp.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- - -# Note: 'scopes' comes from here (https://developers.google.com/identity/protocols/googlescopes#computev1) -- name: dynamic_inventory/gcp | Get GCP instance facts - gcp_compute_instance_info: - zone: "{{cluster_vars.region}}-{{item}}" - filters: - - "name = {{cluster_name}}*" - - "status = RUNNING" # gcloud compute instances list --filter="status=RUNNING" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - scopes: ["https://www.googleapis.com/auth/compute.readonly"] - with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" - register: r__gcp_compute_instance_info - delegate_to: localhost - run_once: true - -#- debug: msg={{r__gcp_compute_instance_info}} - -- name: dynamic_inventory/gcp | Set dynamic_inventory_flat - set_fact: - dynamic_inventory_flat: | - {% set res = _dynamic_inventory_flat__urlregion -%} - {%- for cluster_var in res -%}{%- set _ = cluster_var.update({'regionzone': cluster_var.regionzone | regex_replace('^.*/(.*)$', '\\1') }) -%}{%- endfor -%} - {{ res }} - vars: - _dynamic_inventory_flat__urlregion: | - {%- if cluster_vars.inventory_ip == 'private' -%} - {{ r__gcp_compute_instance_info.results | json_query('[*].resources[].{hosttype: labels.hosttype, hostname: name, private_ip: networkInterfaces[0].networkIP, public_ip: networkInterfaces[0].accessConfigs[0].natIP, inventory_ip: networkInterfaces[0].networkIP, regionzone: zone}') }} - {%- else -%} - {{ r__gcp_compute_instance_info.results | json_query('[*].resources[].{hosttype: labels.hosttype, hostname: name, private_ip: networkInterfaces[0].networkIP, public_ip: networkInterfaces[0].accessConfigs[0].natIP, inventory_ip: networkInterfaces[0].accessConfigs[0].natIP, regionzone: zone}') }} - {%- endif -%} diff --git a/dynamic_inventory/tasks/main.yml b/dynamic_inventory/tasks/main.yml index d1c57299..4780028a 100644 --- a/dynamic_inventory/tasks/main.yml +++ b/dynamic_inventory/tasks/main.yml @@ -1,30 +1,33 @@ --- -- name: "dynamic_inventory | Derive dynamic inventory for {{cluster_vars.type}} cluster" - include_tasks: "{{cluster_vars.type}}.yml" +- name: "dynamic_inventory | Get cluster_hosts_state for {{cluster_vars.type}} cluster" + include_role: + name: 'clusterverse/cluster_hosts' + tasks_from: "get_cluster_hosts_state_{{cluster_vars.type}}.yml" -- assert: { that: "dynamic_inventory_flat is defined", msg: "dynamic_inventory_flat is not defined" } +- name: dynamic_inventory | assert that cluster_hosts_state is defined + assert: { that: "cluster_hosts_state is defined", msg: "cluster_hosts_state is not defined" } -- name: dynamic_inventory | dynamic_inventory_flat - debug: msg="{{dynamic_inventory_flat}}" +- name: dynamic_inventory | cluster_hosts_state + debug: msg="{{cluster_hosts_state}}" -- name: dynamic_inventory | Refresh (clean it, because there is no file or plugin inventory defined) the in-memory inventory prior to building it (this is in case this module is called multiple times, and we otherwise only add hosts to existing inventory) +- name: dynamic_inventory | Refresh the in-memory inventory prior to building it (in this case, empties it, because there is no file or plugin inventory defined). This is in case this module is called multiple times, and we otherwise only add hosts to existing inventory. meta: refresh_inventory -- name: dynamic_inventory | get (only network) facts - to determine the local IP/network +- name: dynamic_inventory | Get (network) facts - to determine the local IP/network, to see if we need the bastion below (requires the 'ip' tool (the 'iproute2' package on Ubuntu)) setup: { gather_subset: ["network"] } - name: dynamic_inventory | Add hosts to dynamic inventory add_host: - name: "{{ item.hostname }}" - groups: "{{ item.hosttype }},{{ cluster_name }},{{ clusterid }}{% if 'regionzone' in item %},{{ item.regionzone }}{% endif %}" - ansible_host: "{{ item.inventory_ip }}" - hosttype: "{{ item.hosttype }}" - regionzone: "{{ item.regionzone | default(omit) }}" + name: "{{ item.name }}" + groups: "{{ item.tagslabels.hosttype }},{{ cluster_name }},{{ clusterid }}{% if item.regionzone is defined and item.regionzone %},{{ item.regionzone }}{% endif %}" + ansible_host: "{{ item.ipv4.public if cluster_vars.inventory_ip=='public' else item.ipv4.private }}" + hosttype: "{{ item.tagslabels.hosttype }}" + regionzone: "{{ item.regionzone if item.regionzone else omit }}" ansible_ssh_common_args: "{{ cluster_vars[buildenv].ssh_connection_cfg.bastion.ssh_args if (_bastion_host and (not _bastion_in_host_net or (force_use_bastion is defined and force_use_bastion|bool))) else (omit) }}" # Don't use the bastion if we're running in the same subnet (assumes all hosts in subnet can operate as a bastion), or if the user sets '-e force_use_bastion=true' ansible_user: "{{ cluster_vars[buildenv].ssh_connection_cfg.host.ansible_user | default(omit) }}" ansible_ssh_private_key_file: "{{ cluster_vars[buildenv].ssh_connection_cfg.host.ansible_ssh_private_key_file | default(None) | ternary('id_rsa_ansible_ssh_private_key_file', omit) }}" - with_items: "{{ dynamic_inventory_flat }}" + with_items: "{{ cluster_hosts_state | json_query(\"[?contains('RUNNING,running,poweredOn', instance_state)]\") }}" vars: _local_cidr: "{{ (ansible_default_ipv4.network+'/'+ansible_default_ipv4.netmask) | ipaddr('network/prefix') }}" # Get the network the localhost IP is in _bastion_host: "{{ cluster_vars[buildenv].ssh_connection_cfg.bastion.ssh_args | default() | regex_replace('.*@([]\\w\\d\\.-]*).*', '\\1') }}" # Extract just the bastion hostname from 'cluster_vars[buildenv].ssh_connection_cfg.bastion.ssh_args' @@ -39,10 +42,10 @@ - name: dynamic_inventory | Populate inventory file from dynamic inventory copy: content: | - {% for groupname in groups.keys() -%} + {% for groupname in groups.keys() | sort() -%} {% if groupname not in ["all", "ungrouped"] -%} [{{ groupname }}] - {% for hostname in groups[groupname] %} + {% for hostname in groups[groupname] | sort() %} {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} {% if 'ansible_user' in hostvars[hostname] %}ansible_user='{{ hostvars[hostname].ansible_user }}'{% endif %} {% if 'ansible_ssh_private_key_file' in hostvars[hostname] %}ansible_ssh_private_key_file='{{ hostvars[hostname].ansible_ssh_private_key_file }}'{% endif %} {% if 'regionzone' in hostvars[hostname] %}regionzone={{ hostvars[hostname].regionzone }}{% endif %} {% if 'ansible_ssh_common_args' in hostvars[hostname] %}ansible_ssh_common_args='{{ hostvars[hostname].ansible_ssh_common_args }}'{% endif %}{{''}} {% endfor %} diff --git a/jenkinsfiles/Jenkinsfile_testsuite b/jenkinsfiles/Jenkinsfile_testsuite index fad32c37..26b6fab0 100644 --- a/jenkinsfiles/Jenkinsfile_testsuite +++ b/jenkinsfiles/Jenkinsfile_testsuite @@ -119,7 +119,7 @@ class cStageBuild { } } -// A pipeline 'stage' template for clusterverse-ops boilerplace +// A pipeline 'stage' template for clusterverse-ops boilerplate def stage_cvops(String stageLabel, cStageBuild stageBuild, Closure stageExpressions) { stage(stageLabel) { if (stageBuild.result == 'SUCCESS') { diff --git a/readiness/tasks/main.yml b/readiness/tasks/main.yml index a3d57b01..966ef941 100644 --- a/readiness/tasks/main.yml +++ b/readiness/tasks/main.yml @@ -1,7 +1,7 @@ --- - name: readiness | Remove maintenance mode - include_tasks: remove_maintenance_mode.yml + include_tasks: "remove_maintenance_mode_{{cluster_vars.type}}.yml" when: (prometheus_set_unset_maintenance_mode is defined and prometheus_set_unset_maintenance_mode|bool) - name: readiness | create/update DNS CNAME records diff --git a/readiness/tasks/remove_maintenance_mode.yml b/readiness/tasks/remove_maintenance_mode.yml deleted file mode 100644 index 3ecc5fde..00000000 --- a/readiness/tasks/remove_maintenance_mode.yml +++ /dev/null @@ -1,58 +0,0 @@ ---- - -- block: - - name: remove_maintenance_mode/aws | Get existing AWS EC2 instance info - ec2_instance_info: - filters: - "tag:cluster_name": "{{cluster_name}}" - "instance-state-name": ["running", "stopped"] - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - register: r__ec2_instance_info - delegate_to: localhost - run_once: true - - - name: remove_maintenance_mode/aws | Set maintenance_mode to false - ec2_tag: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - resource: "{{ item }}" - tags: - maintenance_mode: "false" - delegate_to: localhost - run_once: true - with_items: "{{ r__ec2_instance_info.instances | json_query(\"[].instance_id\") }}" - when: cluster_vars.type == "aws" - -- block: - - name: remove_maintenance_mode/gcp | Get existing GCP GCE instance info (per AZ) - gcp_compute_instance_info: - zone: "{{cluster_vars.region}}-{{item}}" - filters: - - "labels.cluster_name = {{cluster_name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - scopes: ["https://www.googleapis.com/auth/compute.readonly"] - with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" - register: r__gcp_compute_instance_info - delegate_to: localhost - run_once: true - - # Use this because the gce_labels command does not replace existing labels. https://github.com/ansible/ansible/pull/59891 - - name: remove_maintenance_mode/gcp | Set maintenance_mode to false - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.zone | regex_replace('^.*/(.*)$', '\\1') }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" - status: "{{item.status}}" - labels: "{{ item.labels | combine({'maintenance_mode': 'false'}) }}" - with_items: "{{ r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[]\") }}" - delegate_to: localhost - run_once: true - when: cluster_vars.type == "gcp" diff --git a/readiness/tasks/remove_maintenance_mode_aws.yml b/readiness/tasks/remove_maintenance_mode_aws.yml new file mode 100644 index 00000000..f000dc9b --- /dev/null +++ b/readiness/tasks/remove_maintenance_mode_aws.yml @@ -0,0 +1,13 @@ +--- + +- name: remove_maintenance_mode/aws | Set maintenance_mode to false + ec2_tag: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + resource: "{{ item }}" + tags: + maintenance_mode: "false" + delegate_to: localhost + run_once: true + with_items: "{{ cluster_hosts_state | json_query(\"[].instance_id\") }}" diff --git a/readiness/tasks/remove_maintenance_mode_gcp.yml b/readiness/tasks/remove_maintenance_mode_gcp.yml new file mode 100644 index 00000000..4de96816 --- /dev/null +++ b/readiness/tasks/remove_maintenance_mode_gcp.yml @@ -0,0 +1,29 @@ +--- + +# Use this because the gce_labels command does not replace existing labels. https://github.com/ansible/ansible/pull/59891 +- name: remove_maintenance_mode/gcp | Set maintenance_mode=false asynchronously + gcp_compute_instance: + name: "{{item.name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{ item.regionzone | regex_replace('^.*/(.*)$', '\\1') }}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" + status: "{{item.instance_state}}" + labels: "{{ item.tagslabels | combine({'maintenance_mode': 'false'}) }}" + register: r__gcp_compute_instance + with_items: "{{ cluster_hosts_state }}" + delegate_to: localhost + run_once: true + async: 7200 + poll: 0 + +- name: remove_maintenance_mode/gcp | Wait for maintenance_mode labelling to finish + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__gcp_compute_instance.results}}" + delegate_to: localhost + run_once: true diff --git a/redeploy/__common/tasks/powerchange_vms_aws.yml b/redeploy/__common/tasks/powerchange_vms_aws.yml new file mode 100644 index 00000000..f9ad24b6 --- /dev/null +++ b/redeploy/__common/tasks/powerchange_vms_aws.yml @@ -0,0 +1,27 @@ +--- + +- name: "powerchange_vms/aws | hosts_to_powerchange (to {{powerchange_new_state}})" + debug: msg="{{hosts_to_powerchange}}" + +- name: "powerchange_vms/aws | {{powerchange_new_state}} VM(s) and set maintenance_mode=true (if stopping)" + block: + - name: powerchange_vms/aws | Set maintenance_mode=true (if stopping) + ec2_tag: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + resource: "{{ item.instance_id }}" + tags: { maintenance_mode: "true" } + with_items: "{{ hosts_to_powerchange }}" + when: "powerchange_new_state == 'stop'" + + - name: "powerchange_vms/aws | {{powerchange_new_state}} VMs" + ec2_instance: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{ cluster_vars.region }}" + state: "{% if powerchange_new_state == 'stop' %}stopped{% else %}running{% endif %}" + instance_ids: "{{ hosts_to_powerchange | json_query(\"[].instance_id\") }}" + delegate_to: localhost + run_once: true + when: hosts_to_powerchange | length diff --git a/redeploy/__common/tasks/powerchange_vms_azure.yml b/redeploy/__common/tasks/powerchange_vms_azure.yml new file mode 100644 index 00000000..c49996e9 --- /dev/null +++ b/redeploy/__common/tasks/powerchange_vms_azure.yml @@ -0,0 +1,32 @@ +--- + +- name: "powerchange_vms/azure | hosts_to_powerchange (to {{powerchange_new_state}})" + debug: msg="{{hosts_to_powerchange}}" + +- name: "powerchange_vms/azure | {{powerchange_new_state}} VM(s) and set maintenance_mode=true (if stopping)" + block: + - name: "powerchange_vms/azure | {{powerchange_new_state}} VMs asynchronously and set maintenance_mode=true (if stopping)" + azure.azcollection.azure_rm_virtualmachine: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + append_tags: yes + name: "{{ item.name }}" + tags: "{% if powerchange_new_state == 'stop' %}{'maintenance_mode': 'true'}{% else %}{{omit}}{% endif %}" + started: "{% if powerchange_new_state == 'stop' %}no{% else %}yes{% endif %}" + zones: ["{{ (item.regionzone.split('-'))[1] }}"] + register: r__azure_rm_virtualmachine + with_items: "{{ hosts_to_powerchange }}" + async: 7200 + poll: 0 + + - name: "powerchange_vms/azure | Wait for VM(s) to {{powerchange_new_state}}" + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__azure_rm_virtualmachine.results}}" + when: hosts_to_powerchange | length diff --git a/redeploy/__common/tasks/powerchange_vms_esxifree.yml b/redeploy/__common/tasks/powerchange_vms_esxifree.yml new file mode 100644 index 00000000..c62d3593 --- /dev/null +++ b/redeploy/__common/tasks/powerchange_vms_esxifree.yml @@ -0,0 +1,28 @@ +--- + +- name: "powerchange_vms/esxifree | hosts_to_powerchange (to {{powerchange_new_state}})" + debug: msg="{{hosts_to_powerchange}}" + +- name: "powerchange_vms/esxifree | {{powerchange_new_state}} VM(s) and set maintenance_mode=true" + block: + - name: powerchange_vms/esxifree | Set maintenance_mode=true (if stopping) + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: unchanged + annotation: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" + with_items: "{{ hosts_to_powerchange }}" + when: "powerchange_new_state == 'stop'" + + - name: "powerchange_vms/esxifree | {{powerchange_new_state}} VMs asynchronously" + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: "{% if powerchange_new_state == 'stop' %}shutdownguest{% else %}poweredon{% endif %}" + with_items: "{{ hosts_to_powerchange }}" + when: hosts_to_powerchange | length + \ No newline at end of file diff --git a/redeploy/__common/tasks/powerchange_vms_gcp.yml b/redeploy/__common/tasks/powerchange_vms_gcp.yml new file mode 100644 index 00000000..6cff2daa --- /dev/null +++ b/redeploy/__common/tasks/powerchange_vms_gcp.yml @@ -0,0 +1,31 @@ +--- + +- name: "powerchange_vms/gcp | hosts_to_powerchange (to {{powerchange_new_state}})" + debug: msg="{{hosts_to_powerchange}}" + +- name: "powerchange_vms/gcp | {{powerchange_new_state}} VM(s) and set maintenance_mode=true" + block: + - name: "powerchange_vms/gcp | {{powerchange_new_state}} VMs asynchronously and set maintenance_mode=true (if stopping)" + gcp_compute_instance: + name: "{{item.name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{ item.regionzone }}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" + status: "{% if powerchange_new_state == 'stop' %}TERMINATED{% else %}RUNNING{% endif %}" + labels: "{% if powerchange_new_state == 'stop' %}{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}{% else %}{{omit}}{% endif %}" + with_items: "{{ hosts_to_powerchange }}" + register: r__gcp_compute_instance + async: 7200 + poll: 0 + + - name: "powerchange_vms/gcp | Wait for VM(s) to {{powerchange_new_state}}" + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__gcp_compute_instance.results}}" + when: hosts_to_powerchange | length + \ No newline at end of file diff --git a/redeploy/__common/tasks/poweroff_vms.yml b/redeploy/__common/tasks/poweroff_vms.yml deleted file mode 100644 index b6cdad56..00000000 --- a/redeploy/__common/tasks/poweroff_vms.yml +++ /dev/null @@ -1,57 +0,0 @@ ---- - -- name: poweroff_vms | hosts_to_stop - debug: msg="{{hosts_to_stop}}" - -- block: - - name: poweroff_vms | Power-off AWS EC2 VM(s) and set maintenance_mode=true - block: - - name: poweroff_vms | Set maintenance_mode label on AWS VM(s) - ec2_tag: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - resource: "{{ item.instance_id }}" - tags: - maintenance_mode: "true" - with_items: "{{ hosts_to_stop }}" - - - name: poweroff_vms | Power-off AWS EC2 VM(s) - ec2: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - state: "stopped" - instance_ids: "{{ hosts_to_stop | json_query(\"[].instance_id\") }}" - wait: true - delegate_to: localhost - run_once: true - when: cluster_vars.type == "aws" - - - - name: poweroff_vms | Power-off GCP GCE VMs asynchronously - block: - - name: poweroff_vms | Power-off GCP GCE VM(s) and set maintenance_mode=true - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.regionzone }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" - status: "TERMINATED" - labels: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" - with_items: "{{ hosts_to_stop }}" - register: r__gcp_compute_instance - async: 7200 - poll: 0 - - - name: poweroff_vms | Wait for GCP GCE instance(s) to power-off - async_status: - jid: "{{ item.ansible_job_id }}" - register: async_jobs - until: async_jobs.finished - retries: 300 - with_items: "{{r__gcp_compute_instance.results}}" - when: cluster_vars.type == "gcp" - when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweron_vms.yml b/redeploy/__common/tasks/poweron_vms.yml deleted file mode 100644 index a93bd18f..00000000 --- a/redeploy/__common/tasks/poweron_vms.yml +++ /dev/null @@ -1,46 +0,0 @@ ---- - -- name: poweron_vms | hosts_to_start - debug: msg="{{hosts_to_start}}" - -- block: - - name: poweron_vms | Power-on AWS EC2 VM(s) - block: - - name: poweron_vms | Power-on AWS EC2 VM(s) - ec2: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - state: "running" - instance_ids: "{{ hosts_to_start | json_query(\"[].instance_id\") }}" - wait: true - delegate_to: localhost - run_once: true - when: cluster_vars.type == "aws" - - - name: poweron_vms | Power-on GCP GCE VM(s) asynchronously - block: - - name: poweron_vms | Power-on GCP GCE VM(s) - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.regionzone }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" - status: "RUNNING" - labels: "{{ item.tagslabels }}" - with_items: "{{ hosts_to_start }}" - register: r__gcp_compute_instance - async: 7200 - poll: 0 - - - name: poweron_vms | Wait for GCP GCE instance(s) to power-on - async_status: - jid: "{{ item.ansible_job_id }}" - register: async_jobs - until: async_jobs.finished - retries: 300 - with_items: "{{r__gcp_compute_instance.results}}" - when: cluster_vars.type == "gcp" - when: hosts_to_start | length \ No newline at end of file diff --git a/redeploy/__common/tasks/set_lifecycle_state_label.yml b/redeploy/__common/tasks/set_lifecycle_state_label.yml deleted file mode 100644 index d36bd546..00000000 --- a/redeploy/__common/tasks/set_lifecycle_state_label.yml +++ /dev/null @@ -1,31 +0,0 @@ ---- - -- name: set_lifecycle_state_label | hosts_to_relabel - debug: msg="{{hosts_to_relabel}}" - -- block: - - name: set_lifecycle_state_label | Change lifecycle_state label on AWS EC2 VM - ec2_tag: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - resource: "{{ item.instance_id }}" - tags: - lifecycle_state: "{{new_state}}" - with_items: "{{ hosts_to_relabel }}" - when: cluster_vars.type == "aws" - - - - name: set_lifecycle_state_label | Change lifecycle_state label on GCP GCE VM - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.regionzone }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" - status: "{{item.instance_state}}" - labels: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" - with_items: "{{ hosts_to_relabel }}" - when: cluster_vars.type == "gcp" - when: hosts_to_relabel | length \ No newline at end of file diff --git a/redeploy/__common/tasks/set_lifecycle_state_label_aws.yml b/redeploy/__common/tasks/set_lifecycle_state_label_aws.yml new file mode 100644 index 00000000..57ea75b4 --- /dev/null +++ b/redeploy/__common/tasks/set_lifecycle_state_label_aws.yml @@ -0,0 +1,14 @@ +--- + +- name: set_lifecycle_state_label/aws | hosts_to_relabel + debug: msg="{{hosts_to_relabel}}" + +- name: "set_lifecycle_state_label/aws | Change lifecycle_state label to {{new_state}}" + ec2_tag: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + resource: "{{ item.instance_id }}" + tags: + lifecycle_state: "{{new_state}}" + with_items: "{{ hosts_to_relabel | default([]) }}" diff --git a/redeploy/__common/tasks/set_lifecycle_state_label_gcp.yml b/redeploy/__common/tasks/set_lifecycle_state_label_gcp.yml new file mode 100644 index 00000000..93f0986e --- /dev/null +++ b/redeploy/__common/tasks/set_lifecycle_state_label_gcp.yml @@ -0,0 +1,16 @@ +--- + +- name: set_lifecycle_state_label/aws | hosts_to_relabel + debug: msg="{{hosts_to_relabel}}" + +- name: "set_lifecycle_state_label/gcp | Change lifecycle_state label to {{new_state}}" + gcp_compute_instance: + name: "{{item.name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{ item.regionzone }}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" + status: "{{item.instance_state}}" + labels: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" + with_items: "{{ hosts_to_relabel | default([]) }}" diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml index 30c5f34f..2c5857cc 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml @@ -27,14 +27,14 @@ block: - assert: { that: "'current' in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))", msg: "ERROR - Cannot tidy when there are no machines in the 'current' lifecycle_state. Please use '-e clean=_all_'." } - - import_role: + - include_role: name: clusterverse/clean - tasks_from: clean_dns.yml + tasks_from: dns.yml when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") - - import_role: + - include_role: name: clusterverse/clean - tasks_from: clean_vms.yml + tasks_from: "{{cluster_vars.type}}.yml" when: (hosts_to_clean | length) - debug: diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml index c774c395..587e94b9 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml @@ -9,7 +9,7 @@ - name: Change lifecycle_state label from 'current' to 'retiring' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "retiring" @@ -78,9 +78,10 @@ - name: Power off old VMs include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" vars: - hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + hosts_to_powerchange: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + powerchange_new_state: "stop" - name: re-acquire cluster_hosts_target and cluster_hosts_state (for tidy) import_role: diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml index 0314c19b..84882e10 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml @@ -11,7 +11,7 @@ - name: rescue | Change lifecycle_state label from 'current' to 'redeployfail' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "redeployfail" @@ -20,7 +20,7 @@ - name: rescue | Change lifecycle_state label from 'retiring' to 'current' state include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" new_state: "current" @@ -57,8 +57,8 @@ - name: rescue | poweroff the failed VMs include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml - when: hosts_to_stop | length + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" + when: hosts_to_powerchange | length vars: - hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='redeployfail']\") }}" - + hosts_to_powerchange: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='redeployfail']\") }}" + powerchange_new_state: "stop" diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml index 8556058b..600cdee0 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml @@ -12,9 +12,9 @@ - name: Redeploy by hosttype; rollback on fail block: - name: Change lifecycle_state label from 'current' to 'retiring' - import_role: + include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "retiring" @@ -41,18 +41,21 @@ name: "{{predeleterole}}" when: predeleterole is defined and predeleterole != "" vars: - hosts_to_remove: "{{ hosts_to_stop | json_query(\"[?contains('RUNNING,running,poweredOn', instance_state)]\") }}" + hosts_to_remove: "{{ hosts_to_change | json_query(\"[?contains('RUNNING,running,poweredOn', instance_state)]\") }}" - name: Power off any other retiring VM(s) that might exist if we're redeploying to a smaller topology. include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" + vars: + hosts_to_powerchange: "{{ hosts_to_change }}" + powerchange_new_state: "stop" vars: - hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring' && ('\"+ myhosttypes|default('') + \"' == '' || contains(['\"+ myhosttypes|default('') + \"'], tagslabels.hosttype))]\") }}" + hosts_to_change: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring' && ('\"+ myhosttypes|default('') + \"' == '' || contains(['\"+ myhosttypes|default('') + \"'], tagslabels.hosttype))]\") }}" when: (canary=="finish" or canary=="none") - name: re-acquire cluster_hosts_target and cluster_hosts_state (for tidy - can't be in the tidy block because the block depends on this info being correct) - import_role: + include_role: name: clusterverse/cluster_hosts when: (canary_tidy_on_success is defined and canary_tidy_on_success|bool) @@ -71,14 +74,14 @@ block: - assert: { that: "'current' in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))", msg: "ERROR - Cannot tidy when there are no machines in the 'current' lifecycle_state. Please use '-e clean=_all_'." } - - import_role: + - include_role: name: clusterverse/clean - tasks_from: clean_dns.yml + tasks_from: dns.yml when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") - - import_role: + - include_role: name: clusterverse/clean - tasks_from: clean_vms.yml + tasks_from: "{{cluster_vars.type}}.yml" when: (hosts_to_clean | length) - debug: diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml index e1cc40ca..cd185693 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml @@ -30,9 +30,10 @@ - name: Power off old VMs include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" vars: - hosts_to_stop: "{{ hosts_to_remove }}" + hosts_to_powerchange: "{{ hosts_to_remove }}" + powerchange_new_state: "stop" - name: re-acquire the dynamic inventory include_role: diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml index 5f5e1a7f..f7e4f235 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml @@ -7,9 +7,10 @@ - name: rescue | Power-on the 'retiring' VMs include_role: name: clusterverse/redeploy/__common - tasks_from: poweron_vms.yml + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" vars: - hosts_to_start: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + hosts_to_powerchange: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + powerchange_new_state: "start" - name: rescue | re-acquire cluster_hosts_target and cluster_hosts_state import_role: @@ -18,7 +19,7 @@ - name: rescue | Change lifecycle_state label from 'current' to 'redeployfail' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "redeployfail" @@ -26,7 +27,7 @@ - name: rescue | Change lifecycle_state label from 'retiring' to 'current' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" new_state: "current" @@ -61,6 +62,7 @@ - name: rescue | Power-off the VMs include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" vars: - hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='redeployfail']\") }}" + hosts_to_powerchange: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='redeployfail']\") }}" + powerchange_new_state: "stop" diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml index ed310e25..eb998768 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml @@ -1,27 +1,14 @@ --- -- name: _get_diskinfo_aws | ec2_instance_info - ec2_instance_info: - filters: - "instance-state-name": ["running", "stopped"] - "tag:cluster_name": "{{cluster_name}}" - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - register: r__ec2_instance_info - -- name: _get_diskinfo_aws | r__ec2_instance_info - debug: msg={{r__ec2_instance_info}} - -- name: _get_diskinfo_aws | augment cluster_hosts_target auto_volumes with source disk info +- name: _add_src_diskinfo_to_cluster_hosts_target/aws | augment cluster_hosts_target auto_volumes with source disk info set_fact: cluster_hosts_target: | {%- for cht_host in cluster_hosts_target -%} {%- for cht_autovol in cht_host.auto_volumes -%} - {%- for ec2_instance_info_result in r__ec2_instance_info.instances | selectattr('tags.lifecycle_state', '!=', 'current') -%} - {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == ec2_instance_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} - {%- for chs_host_diskinfo in ec2_instance_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) | selectattr('device_name', '!=', '/dev/sda1') -%} - {%- set _ = cht_autovol.update({'src': {'instance_id': ec2_instance_info_result.instance_id, 'device_name': chs_host_diskinfo.device_name, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} + {%- for chs_host in cluster_hosts_state | selectattr('tagslabels.lifecycle_state', '!=', 'current') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host.tagslabels.Name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in chs_host.disk_info_cloud | selectattr('device_name', '==', cht_autovol.device_name) | selectattr('device_name', '!=', '/dev/sda1') -%} + {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host.instance_id, 'device_name': chs_host_diskinfo.device_name, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} {%- endfor -%} {%- endif -%} {%- endfor -%} @@ -29,5 +16,5 @@ {%- endfor -%} {{cluster_hosts_target}} -- name: _get_diskinfo_aws | cluster_hosts_target +- name: _add_src_diskinfo_to_cluster_hosts_target/aws | cluster_hosts_target debug: msg={{cluster_hosts_target}} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml index 5b278a9f..0176e933 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml @@ -1,32 +1,17 @@ --- -- name: _get_diskinfo_gcp | Get existing GCE instance info (per AZ) - gcp_compute_instance_info: - zone: "{{cluster_vars.region}}-{{item}}" - filters: - - "labels.cluster_name = {{cluster_name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - scopes: ["https://www.googleapis.com/auth/compute.readonly"] - with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" - register: r__gcp_compute_instance_info - -- name: _get_diskinfo_gcp | r__gcp_compute_instance_info.results - debug: msg={{r__gcp_compute_instance_info.results}} - -- name: _get_diskinfo_gcp | augment/update cluster_hosts_target auto_volumes with source disk info +- name: _add_src_diskinfo_to_cluster_hosts_target/gcp | augment/update cluster_hosts_target auto_volumes with source disk info set_fact: cluster_hosts_target: | {%- for cht_host in cluster_hosts_target -%} {%- for cht_autovol in cht_host.auto_volumes -%} - {%- for gcp_compute_instance_result in r__gcp_compute_instance_info.results | json_query('[].resources[?labels.lifecycle_state != "current"][]') -%} - {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == gcp_compute_instance_result.name | regex_replace('-(?!.*-).*') -%} - {%- for gcp_compute_instance_diskinfo in gcp_compute_instance_result.disks -%} - {%- if cht_autovol.initialize_params.disk_name | regex_replace('(.*)-.*(--.*)', '\\1\\2') == gcp_compute_instance_diskinfo.source | basename | regex_replace('(.*)-.*(--.*)', '\\1\\2') -%} - {%- set _ = cht_autovol.update({'device_name': gcp_compute_instance_diskinfo.source | basename}) -%} - {%- set _ = cht_autovol.update({'src': {'hostname': gcp_compute_instance_result.name, 'device_name': cht_autovol.device_name, 'source_url': gcp_compute_instance_diskinfo.source }}) -%} - {%- set _ = cht_autovol.update({'initialize_params': {'disk_name': cht_autovol.device_name, 'disk_size_gb': gcp_compute_instance_diskinfo.diskSizeGb}}) -%} + {%- for chs_host in cluster_hosts_state | json_query('[?tagslabels.lifecycle_state != "current"]') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host.name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in chs_host.disk_info_cloud -%} + {%- if cht_autovol.initialize_params.disk_name | regex_replace('(.*)-.*(--.*)', '\\1\\2') == chs_host_diskinfo.source | basename | regex_replace('(.*)-.*(--.*)', '\\1\\2') -%} + {%- set _ = cht_autovol.update({'device_name': chs_host_diskinfo.source | basename}) -%} + {%- set _ = cht_autovol.update({'src': {'hostname': chs_host.name, 'device_name': cht_autovol.device_name, 'source_url': chs_host_diskinfo.source }}) -%} + {%- set _ = cht_autovol.update({'initialize_params': {'disk_name': cht_autovol.device_name, 'disk_size_gb': chs_host_diskinfo.diskSizeGb}}) -%} {%- endif -%} {%- endfor -%} {%- endif -%} @@ -35,5 +20,5 @@ {%- endfor -%} {{cluster_hosts_target}} -- name: _get_diskinfo_gcp | cluster_hosts_target +- name: _add_src_diskinfo_to_cluster_hosts_target/gcp | cluster_hosts_target debug: msg={{cluster_hosts_target}} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml index 33719ddb..cf93d87b 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml @@ -8,16 +8,20 @@ include_role: name: "{{predeleterole}}" vars: - hosts_to_remove: "{{ hosts_to_stop }}" + hosts_to_remove: "{{ hosts_to_change }}" when: predeleterole is defined and predeleterole != "" - name: by_hosttype_by_host | Power off old VM include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" + vars: + hosts_to_powerchange: "{{ hosts_to_change }}" + powerchange_new_state: "stop" + vars: _host_to_redeploy_nosuffix: "{{host_to_redeploy.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname - hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state!='current' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" + hosts_to_change: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state!='current' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" - name: "by_hosttype_by_host | Run {{mainclusteryml}} to add {{host_to_redeploy.hostname}} to cluster" shell: "{{ (argv | join(' ')) | regex_replace('redeploy.yml', mainclusteryml) }} -e cluster_suffix={{cluster_suffix}} -e '{'cluster_hosts_target': [{{host_to_redeploy | to_json}}]}'" @@ -29,22 +33,23 @@ when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool) - name: by_hosttype_by_host | re-acquire cluster_hosts_state (NOT cluster_hosts_target, as we are augmenting this in _add_src_diskinfo_to_cluster_hosts_target__ (also, it is not affected by change of state)) - import_role: + include_role: name: clusterverse/cluster_hosts - tasks_from: get_cluster_hosts_state.yml + tasks_from: "get_cluster_hosts_state_{{cluster_vars.type}}.yml" - name: by_hosttype_by_host | Power on new VM (not needed for normal redeploy, but for rescue case) include_role: name: clusterverse/redeploy/__common - tasks_from: poweron_vms.yml + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" vars: - hosts_to_start: "{{ cluster_hosts_state | selectattr('name', '==', host_to_redeploy.hostname) | list }}" + hosts_to_powerchange: "{{ cluster_hosts_state | selectattr('name', '==', host_to_redeploy.hostname) | list }}" + powerchange_new_state: "start" - name: by_hosttype_by_host | re-acquire the dynamic inventory include_role: name: clusterverse/dynamic_inventory - name: by_hosttype_by_host | re-acquire cluster_hosts_state (NOT cluster_hosts_target, as we are augmenting this in _add_src_diskinfo_to_cluster_hosts_target__ (also, it is not affected by change of state)) - import_role: + include_role: name: clusterverse/cluster_hosts - tasks_from: get_cluster_hosts_state.yml + tasks_from: "get_cluster_hosts_state_{{cluster_vars.type}}.yml" diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml index f95d7820..2b16a089 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml @@ -10,7 +10,7 @@ - name: Change lifecycle_state label from 'current' to 'retiring' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "retiring" @@ -55,7 +55,7 @@ - name: rescue | Change lifecycle_state label from 'current' to 'redeployfail' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "redeployfail" @@ -63,7 +63,7 @@ - name: rescue | Change lifecycle_state label from 'retiring' to 'current' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" new_state: "current" @@ -99,12 +99,12 @@ - include_role: name: clusterverse/clean - tasks_from: clean_dns.yml + tasks_from: dns.yml when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") - include_role: name: clusterverse/clean - tasks_from: clean_vms.yml + tasks_from: "{{cluster_vars.type}}.yml" when: (hosts_to_clean | length) - debug: diff --git a/redeploy/_scheme_rmvm_rmdisk_only/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_rmdisk_only/tasks/by_hosttype_by_host.yml index 4573b744..bb2b3645 100644 --- a/redeploy/_scheme_rmvm_rmdisk_only/tasks/by_hosttype_by_host.yml +++ b/redeploy/_scheme_rmvm_rmdisk_only/tasks/by_hosttype_by_host.yml @@ -9,9 +9,9 @@ hosts_to_remove: "{{ cluster_hosts_state | json_query(\"[?name==`\" + host_to_del.hostname + \"`]\") }}" when: predeleterole is defined and predeleterole != "" -- import_role: +- include_role: name: clusterverse/clean - tasks_from: clean_vms.yml + tasks_from: "{{cluster_vars.type}}.yml" vars: hosts_to_clean: "{{ cluster_hosts_state | json_query(\"[?name==`\" + host_to_del.hostname + \"`]\") }}" diff --git a/redeploy/meta/main.yml b/redeploy/meta/main.yml index 1773f5c9..089003f4 100644 --- a/redeploy/meta/main.yml +++ b/redeploy/meta/main.yml @@ -1,5 +1,5 @@ --- dependencies: - - role: '_dependencies' + - role: 'dynamic_inventory' - role: 'cluster_hosts'